# <font color="#49699E" size=40>Bayesian Hierarchical Regression Modelling</font>
# LEARNING OBJECTIVES
# LEARNING MATERIALS
# INTRODUCTION
## Imports

In [ ]:
import pandas as pd
pd.set_option("display.notebook_repr_html", False)
import numpy as np
import seaborn as sns
import pymc3 as pm
import arviz as az

import matplotlib as mpl
from matplotlib import pyplot as plt
from dcss.plotting import custom_seaborn
custom_seaborn()

from dcss.bayes import plot_2020_no_pool, plot_2020_partial_pool

import warnings
warnings.filterwarnings('ignore')

# SO, WHAT'S A HIERARCHICAL MODEL?


# Goldilocks and the Three Pools


## Load Data


In [ ]:
df = pd.read_csv('../data/2020_election/2020_districts_combined.csv')

spend_std = (df.spend - np.mean(df.spend))/ np.std(df.spend)
vote_std = (df.vote - np.mean(df.vote))/ np.std(df.vote)
state_cat = pd.Categorical(df.state)
state_idx = state_cat.codes
n_states = len(set(state_idx))
dem_inc = df.dem_inc
rep_inc = df.rep_inc
pvi_std = (df.pvi - np.mean(df.pvi))/np.std(df.pvi)

In [ ]:
df.head()

In [ ]:
df[['dem_inc', 'rep_inc', 'pvi']].describe()

In [ ]:
district_3_state = state_idx[3]
print(district_3_state)
print(state_cat.categories[district_3_state])

## No Pooling Model

In [ ]:
with pm.Model() as no_pool_model:
    
    # Priors
    alpha = pm.Normal("alpha", mu=0, sigma=2, shape=n_states)
    beta = pm.Normal("beta", mu=1, sigma=2, shape=n_states)
    sigma = pm.Exponential("sigma", 2)
    
    # Linear Model
    mu = alpha[state_idx] + beta[state_idx] * spend_std
    
    # Likelihood
    votes = pm.Normal("votes", mu=mu, sigma=sigma, observed=vote_std)
    
    # Run Sample Traces
    trace_no_pool = pm.sample()

In [ ]:
with no_pool_model:
    az.plot_trace(trace_no_pool, ['alpha', 'beta', 'sigma'], compact=True)

In [ ]:
with no_pool_model:
    ppc = pm.sample_posterior_predictive(trace_no_pool
                                         , var_names=['votes', 'alpha', 'beta', 'sigma',])

In [ ]:
plot_2020_no_pool(
    no_pool_model, 
    trace_no_pool,
    n_states, 
    state_idx,
    spend_std,
    vote_std,
    ppc,
    state_cat
)

### Partially Pooled Model


In [ ]:
with pm.Model() as partial_pool_model:
    
    # Hyperpriors
    alpha_mu = pm.Normal("alpha_mu", mu=1, sigma=2)
    beta_mu = pm.Normal("beta_mu", mu=1, sigma=2)
    alpha_sigma = pm.Exponential("alpha_sigma", 1)
    beta_sigma = pm.Exponential("beta_sigma", 1)
    
    # Priors
    alpha = pm.Normal("alpha", mu=alpha_mu, sigma=alpha_sigma, shape=n_states)
    beta = pm.Normal("beta", mu=beta_mu, sigma=beta_sigma, shape=n_states)
    sigma = pm.Exponential("sigma", 2)
    
    # Linear Model
    mu = alpha[state_idx] + (beta[state_idx]*spend_std) 
    
    # Likelihood
    votes = pm.Normal("votes", mu=mu, sigma=sigma, observed=vote_std)

In [ ]:
with partial_pool_model:
    trace_partial_pool = pm.sample(random_seed=42)

#### The Peril is in the Priors


In [ ]:
with partial_pool_model:
    az.plot_trace(trace_partial_pool, ['alpha_mu', 'beta_mu', 'alpha_sigma', 'beta_sigma', 'sigma'], compact=True)

## Informative Priors: A Spoonful of Information Makes the Sampler Calm Down


In [ ]:
with pm.Model() as partial_pool_model_regularized:
    
    # Hyperpriors
    alpha_mu = pm.Normal("alpha_mu", mu=.1, sigma=.3)
    beta_mu = pm.Normal("beta_mu", mu=.1, sigma=.3)
    alpha_sigma = pm.Gamma("alpha_sigma", alpha=4, beta=0.10)
    beta_sigma = pm.Gamma("beta_sigma", alpha=4, beta=0.10)
    
    # Priors
    alpha = pm.Normal("alpha", mu=alpha_mu, sigma=alpha_sigma, shape=n_states)
    beta = pm.Normal("beta", mu=beta_mu, sigma=beta_sigma, shape=n_states)
    sigma = pm.Gamma("sigma", alpha=4, beta=0.10)
    
    # Linear Model
    mu = pm.Deterministic("mu", alpha[state_idx] + (beta[state_idx]*spend_std))
    
    # Likelihood
    votes = pm.Normal("votes", mu=mu, sigma=sigma, observed=vote_std)
    
    # Run Sample Traces
    trace_partial_pool_regularized = pm.sample(
        random_seed=42
    )

In [ ]:
with partial_pool_model_regularized:
    az.plot_trace(trace_partial_pool_regularized, ['alpha_mu', 'beta_mu', 'alpha_sigma', 'beta_sigma', 'sigma'], compact=True)

In [ ]:
with partial_pool_model_regularized:
    ppc = pm.sample_posterior_predictive(trace_partial_pool_regularized
                                         , var_names=['votes', 'alpha_mu', 'beta_mu', 'alpha_sigma', 'beta_sigma', 'alpha', 'beta', 'sigma', 'mu'])

In [ ]:
with partial_pool_model_regularized:
    summary = az.summary(trace_partial_pool_regularized, round_to=2, var_names=['alpha_mu', 'beta_mu', 'alpha_sigma', 'beta_sigma', 'sigma'])
    
summary[['mean', 'sd', 'r_hat']]

In [ ]:
plot_2020_partial_pool(
    partial_pool_model_regularized,
    trace_partial_pool_regularized,
    trace_no_pool,
    n_states, 
    state_idx,
    spend_std,
    vote_std,
    ppc,
    state_cat
)

## Shrinkage


## Does the Model Fit? Posterior Predictive Plots 


In [ ]:
mu_hpd = az.hdi(ppc["mu"], 0.89)
D_sim = ppc["votes"].mean(axis=0)

fig, ax = plt.subplots(figsize=(6, 6))

plt.errorbar(
    vote_std,
    ppc["votes"].mean(0),
    yerr=np.abs(ppc["votes"].mean(0) - mu_hpd.T),
    fmt="C0o",
)

ax = sns.scatterplot(vote_std, D_sim, s=1, color='darkgray')

min_x, max_x = vote_std.min(), vote_std.max()
ax.plot([min_x, max_x], [min_x, max_x], "k--")

ax.set_ylabel("Predicted vote differential")
ax.set_xlabel("Observed votes differential")

sns.despine()
plt.show()

# THE BEST MODEL OUR DATA CAN BUY


In [ ]:
with pm.Model() as full_hierarchical_model:
    
    # Hyperpriors
    alpha_mu_state = pm.Normal("alpha_mu_state", mu=.1, sigma=.3)
    alpha_sigma_state = pm.Gamma("alpha_sigma_state", alpha=4, beta=0.10)
    beta_mu_spend = pm.Normal("beta_mu_spend", mu=.1, sigma=.3)
    beta_sigma_spend = pm.Gamma("beta_sigma_spend", alpha=4, beta=0.10)
    
    # Priors from Hyperpriors
    alpha_state = pm.Normal("alpha_state", mu=alpha_mu_state, sigma=alpha_sigma_state, shape=n_states)
    beta_spend = pm.Normal("beta_spend", mu=beta_mu_spend, sigma=beta_sigma_spend, shape=n_states)
    
    # Priors
    beta_pvi     = pm.Normal("beta_pvi", mu=1, sigma=0.3)
    beta_rep_inc = pm.Normal("beta_rep_inc", mu=-0.5, sigma=0.2)
    beta_dem_inc = pm.Normal("beta_dem_inc", mu=0.5, sigma=0.2)
    sigma = pm.Gamma("sigma", alpha=4, beta=0.10)
    
    # Linear Model
    mu = pm.Deterministic("mu", 
                         alpha_state[state_idx] + 
                         beta_spend[state_idx] * spend_std +
                         beta_pvi * pvi_std +
                         beta_rep_inc * rep_inc +
                         beta_dem_inc * dem_inc 
                         )
    
    # Likelihood
    votes = pm.Normal("votes", mu=mu, sigma=sigma, observed=vote_std)
    
    # Run Sample Traces
    trace_full_hierarchical_model = pm.sample(
        target_accept=0.97,
        random_seed=42
    )

In [ ]:
with full_hierarchical_model:
    az.plot_trace(trace_full_hierarchical_model, 
                  [
                      'alpha_mu_state', 
                      'alpha_sigma_state', 
                      'beta_mu_spend', 
                      'beta_sigma_spend', 
                      'beta_pvi',
                      'beta_rep_inc',
                      'beta_dem_inc',
                      'sigma',
                  ], compact=True)

In [ ]:
with full_hierarchical_model:
    ppc = pm.sample_posterior_predictive(trace_full_hierarchical_model, var_names=['votes', 'mu'])
    
mu_hpd = az.hdi(ppc["mu"], 0.89)
D_sim = ppc["votes"].mean(axis=0)

fig, ax = plt.subplots(figsize=(6, 6))

plt.errorbar(
    vote_std,
    ppc["votes"].mean(0),
    yerr=np.abs(ppc["votes"].mean(0) - mu_hpd.T),
    fmt="C0o",
)

ax = sns.scatterplot(vote_std, D_sim, s=1, color='darkgray')

min_x, max_x = vote_std.min(), vote_std.max()
ax.plot([min_x, max_x], [min_x, max_x], "k--")

ax.set_ylabel("Predicted vote differential")
ax.set_xlabel("Observed votes differential")

sns.despine()

plt.show()

In [ ]:
sns.set_style('whitegrid')
ax = az.plot_forest(trace_full_hierarchical_model,
                    var_names=['beta_pvi', 'beta_dem_inc', 'beta_rep_inc', 'beta_spend'],
                    combined=True,
                    quartiles=False)

labels = np.append(np.array(list(reversed(state_cat.categories))),
                   ('Republican Incumbency', 'Democratic Incumbency', 'PVI'))

_ = ax[0].set_yticklabels(labels)
_ = ax[0].set_title(
    "coefficients for spending differentials, incumbency, and PVI")

sns.despine(left=False, bottom=False, top=False, right=False)
plt.show()

# THE FAULT IN OUR (LACK OF) STARS


# CONCLUSION
## Key Points 
