Based on https://bambinos.github.io/bambi/notebooks/beta_regression.html

In [None]:
import arviz as az
import bambi as bmb
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from scipy import stats
from scipy.special import expit

In [None]:
az.style.use("arviz-darkgrid")

In [None]:
alpha = 1_000
beta = 1_000
p = np.random.beta(alpha, beta, size=10_000)
az.plot_kde(p)
plt.xlabel("$p$");

In [None]:
data = pd.DataFrame({"probabilities": p})
model = bmb.Model("probabilities ~ 1", data, family="beta")
fitted = model.fit()

In [None]:
az.plot_trace(fitted);

In [None]:
az.summary(fitted)

In [None]:
def mukappa_to_alphabeta(mu, kappa):
    # Calculate alpha and beta
    alpha = mu * kappa
    beta = (1 - mu) * kappa

    # Get mean values and 95% HDIs
    alpha_mean = alpha.mean(("chain", "draw")).item()
    alpha_hdi = az.hdi(alpha, hdi_prob=0.95)["x"].values
    beta_mean = beta.mean(("chain", "draw")).item()
    beta_hdi = az.hdi(beta, hdi_prob=0.95)["x"].values

    return alpha_mean, alpha_hdi, beta_mean, beta_hdi


alpha, alpha_hdi, beta, beta_hdi = mukappa_to_alphabeta(
    expit(fitted.posterior["Intercept"]), fitted.posterior["kappa"]
)

print(
    f"Alpha - mean: {np.round(alpha)}, 95% HDI: {np.round(alpha_hdi[0])} - {np.round(alpha_hdi[1])}"
)
print(
    f"Beta - mean: {np.round(beta)}, 95% HDI: {np.round(beta_hdi[0])} - {np.round(beta_hdi[1])}"
)

In [None]:
effect_per_micron = 5.0

# Clean Coin
alpha = 1_000
beta = 1_000
p = np.random.beta(alpha, beta, size=10_000)

# Add two std to tails side (heads more likely)
p_heads = np.random.beta(alpha + 50 * effect_per_micron, beta, size=10_000)
# Add two std to heads side (tails more likely)
p_tails = np.random.beta(alpha - 50 * effect_per_micron, beta, size=10_000)

az.plot_kde(p, label="Clean Coin")
az.plot_kde(p_heads, label="Biased toward heads", plot_kwargs={"color": "C1"})
az.plot_kde(p_tails, label="Biased toward tails", plot_kwargs={"color": "C2"})
plt.xlabel("$p$")
plt.ylim(top=plt.ylim()[1] * 1.25);

In [None]:
# Create amount of dirt on top and bottom
heads_bias_dirt = stats.halfnorm(loc=0, scale=25).rvs(size=1_000)
tails_bias_dirt = stats.halfnorm(loc=0, scale=25).rvs(size=1_000)

# Create the probability per coin
alpha = np.repeat(1_000, 1_000)
alpha = (
    alpha + effect_per_micron * heads_bias_dirt - effect_per_micron * tails_bias_dirt
)
beta = np.repeat(1_000, 1_000)

p = np.random.beta(alpha, beta)

df = pd.DataFrame(
    {
        "p": p,
        "heads_bias_dirt": heads_bias_dirt.round(),
        "tails_bias_dirt": tails_bias_dirt.round(),
    }
)
df.head()

In [None]:
fig, ax = plt.subplots(1, 3, figsize=(16, 5))

df["p"].plot.kde(ax=ax[0])
ax[0].set_xlabel("$p$")

df["heads_bias_dirt"].plot.hist(
    ax=ax[1], bins=np.arange(0, df["heads_bias_dirt"].max())
)
ax[1].set_xlabel("Measured Dirt Biasing Toward Heads ($\mu m$)")
df["tails_bias_dirt"].plot.hist(
    ax=ax[2], bins=np.arange(0, df["tails_bias_dirt"].max())
)
ax[2].set_xlabel("Measured Dirt Biasing Toward Tails ($\mu m$)");

In [None]:
df["delta_d"] = df["heads_bias_dirt"] - df["tails_bias_dirt"]
dirt_model = bmb.Model("p ~ delta_d", df, family="beta")
dirt_fitted = dirt_model.fit()
dirt_model.predict(dirt_fitted, kind="response")

In [None]:
az.summary(dirt_fitted)

In [None]:
az.plot_ppc(dirt_fitted);

In [None]:
mean_effect = expit(dirt_fitted.posterior.delta_d.mean())
hdi = az.hdi(dirt_fitted.posterior.delta_d, hdi_prob=0.95)
lower = expit(hdi.delta_d[0])
upper = expit(hdi.delta_d[1])
print(f"Mean effect: {mean_effect.item():.4f}")
print(f"95% interval {lower.item():.4f} - {upper.item():.4f}")

In [None]:
batting = bmb.load_data("batting")

In [None]:
batting["batting_avg"] = batting["H"] / batting["AB"]
batting = batting[batting["AB"] > 100]
df = batting[(batting["yearID"] > 1990) & (batting["yearID"] < 2018)]

In [None]:
df.batting_avg.hist(bins=30)
plt.xlabel("Batting Average")
plt.ylabel("Count");

In [None]:
model_avg = bmb.Model("batting_avg ~ 1", df, family="beta")
avg_fitted = model_avg.fit()

In [None]:
az.summary(avg_fitted)

In [None]:
posterior_predictive = model_avg.predict(avg_fitted, kind="response")

In [None]:
az.plot_ppc(avg_fitted);

In [None]:
# Add the player's batting average in the n-1 year
batting["batting_avg_shift"] = np.where(
    batting["playerID"] == batting["playerID"].shift(),
    batting["batting_avg"].shift(),
    np.nan,
)
df_shift = batting[(batting["yearID"] > 1990) & (batting["yearID"] < 2018)]
df_shift = df_shift[~df_shift["batting_avg_shift"].isna()]
df_shift[["batting_avg_shift", "batting_avg"]].corr()

In [None]:
model_avg = bmb.Model("batting_avg ~ 1", df_shift, family="beta")
avg_fitted = model_avg.fit(idata_kwargs={"log_likelihood": True})

model_lag = bmb.Model("batting_avg ~ batting_avg_shift", df_shift, family="beta")
lag_fitted = model_lag.fit(idata_kwargs={"log_likelihood": True})

In [None]:
az.summary(lag_fitted)

In [None]:
az.compare({"intercept-only": avg_fitted, "lag-model": lag_fitted})

In [None]:
ppc = model_lag.predict(lag_fitted, kind="response")
az.plot_ppc(lag_fitted);

In [None]:
mean_effect = lag_fitted.posterior.batting_avg_shift.mean().item()
hdi = az.hdi(lag_fitted.posterior.batting_avg_shift, hdi_prob=0.95)

lower = expit(hdi.batting_avg_shift[0]).item()
upper = expit(hdi.batting_avg_shift[1]).item()
print(f"Mean effect: {expit(mean_effect):.4f}")
print(f"95% interval {lower:.4f} - {upper:.4f}")

In [None]:
az.plot_hdi(
    df_shift.batting_avg_shift,
    lag_fitted.posterior_predictive.batting_avg,
    hdi_prob=0.95,
    color="goldenrod",
    fill_kwargs={"alpha": 0.8},
)
az.plot_hdi(
    df_shift.batting_avg_shift,
    lag_fitted.posterior_predictive.batting_avg,
    hdi_prob=0.68,
    color="forestgreen",
    fill_kwargs={"alpha": 0.8},
)

intercept = lag_fitted.posterior.Intercept.values.mean()
x = np.linspace(df_shift.batting_avg_shift.min(), df_shift.batting_avg_shift.max(), 100)
linear = mean_effect * x + intercept
plt.plot(x, expit(linear), c="black")
plt.xlabel("Previous Year's Batting Average")
plt.ylabel("Batting Average");