# <font color="#49699E" size=40>Bayesian Regression Models with Probabilistic Programming</font>
# LEARNING OBJECTVES
# LEARNING MATERIALS
# INTRODUCTION
## Imports

In [ ]:
import pandas as pd
pd.set_option("display.notebook_repr_html", False)
import numpy as np
import seaborn as sns
import pymc3 as pm
import arviz as az

import matplotlib as mpl
from matplotlib import pyplot as plt

from dcss.plotting import custom_seaborn
custom_seaborn()

from dcss.bayes import plot_2020_election_diff, plot_2020_election_fit

import warnings
warnings.filterwarnings('ignore')

## Data


In [ ]:
df = pd.read_csv('../data/2020_election/2020_districts_combined.csv')
df.head()

## Checking and Cleaning the Data


In [ ]:
pd.options.display.float_format = "{:.2f}".format

df[['vote', 'spend', 'districts']].describe()

In [ ]:
plot_2020_election_diff(df)

### Standardize Data, Process Categoricals


In [ ]:
spend_std = (df.spend - np.mean(df.spend))/ np.std(df.spend)
vote_std = (df.vote - np.mean(df.vote))/ np.std(df.vote)
state_cat =pd.Categorical(df.state)
state_idx = state_cat.codes
n_states = len(set(state_idx))

# DEVELOPING OUR BAYESIAN MODEL


## Making the Model with PyMC3

### Context Management for Modelling with PyMC3


In [ ]:
with pm.Model() as test_model: 
    testPrior = pm.Normal("testPrior", 0, 1)

In [ ]:
test_model

In [ ]:
testPrior

In [ ]:
with test_model:
    anotherTest = pm.Normal("anotherTest", 2.5, 10)
    
test_model

### Specifying the Model in PyMC3


In [ ]:
with pm.Model() as pool_model:
    # Priors
    alpha = pm.Normal("alpha", mu=1, sigma=2)
    beta = pm.Normal("beta", mu=1, sigma=2)
    sigma = pm.Exponential("sigma", lam=2)

In [ ]:
with pool_model:
    # Linear Model
    mu = alpha + beta * spend_std

In [ ]:
with pool_model:
    # Likelihood
    votes = pm.Normal("votes", mu=mu, sigma=sigma, observed=vote_std)

## Prior Predictive Check


In [ ]:
with pool_model:
    prior_predictive = pm.sample_prior_predictive(
        samples=50, var_names=['alpha', 'beta', 'sigma', 'votes'], random_seed=42)

prior_predictive.keys()

In [ ]:
prior_predictive['votes'].shape

In [ ]:
spend_grid = np.linspace(-20, 20, 50)

plt.xlim((-10, 10))
plt.ylim((-10, 10))

for a, b in zip(prior_predictive["alpha"], prior_predictive['beta']):
    # This is the same linear model that appeared in our PyMC3 definition above
    vote_sim = a + b * spend_grid 
    plt.plot(spend_grid, vote_sim, c="k", alpha=0.4)

plt.axhspan(-2, 2, facecolor='black', alpha=0.2)
plt.axvspan(-2, 2, facecolor='black', alpha=0.2)
    
plt.xlabel("Expenditure differential (standard deviations)")
plt.ylabel("Vote differential (standard deviations)")
plt.show()

In [ ]:
with pm.Model() as regularized_model:

    # Priors
    alpha = pm.Normal("alpha", mu=0, sigma=0.5)
    beta = pm.Normal("beta", mu=0.5, sigma=1)
    sigma = pm.Exponential("sigma", lam=1)

    # Linear Model
    mu = alpha + beta * spend_std

    # Likelihood
    votes = pm.Normal("votes", mu=mu, sigma=sigma, observed=vote_std)

    reg_prior_pred = pm.sample_prior_predictive(
        samples=50, var_names=['alpha', 'beta', 'sigma', 'votes'], random_seed=42)

In [ ]:
spend_grid = np.linspace(-20, 20, 50)

plt.xlim((-10, 10))
plt.ylim((-10, 10))

for a, b in zip(reg_prior_pred["alpha"], reg_prior_pred['beta']):
    # This is the same linear model that appeared in our PyMC3 definition above
    vote_sim = a + b * spend_grid 
    plt.plot(spend_grid, vote_sim, c="k", alpha=0.4)

plt.axhspan(-2, 2, facecolor='black', alpha=0.2)
plt.axvspan(-2, 2, facecolor='black', alpha=0.2)
    
plt.xlabel("Expenditure differential (standard deviations)")
plt.ylabel("Vote differential (standard deviations)")
plt.show()

## Running Our Model


In [ ]:
with pool_model:
    # Run Sample Traces
    trace_pool = pm.sample()

In [ ]:
with pool_model:
    summary = az.summary(trace_pool, round_to=2)
    
summary[['mean', 'sd', 'r_hat']]

## Checking the Traceplot


In [ ]:
with pool_model:
    az.plot_trace(trace_pool, ['alpha', 'beta', 'sigma'], compact=True)

## Establishing Credible Intervals


In [ ]:
with pool_model:
    fig, axs = plt.subplots(3, 1, sharex=True, sharey=True, figsize=(6, 6))
    az.plot_posterior(trace_pool,
                      ax=axs,
                      var_names=['alpha', 'beta', 'sigma'],
                      hdi_prob=0.89)
    fig.tight_layout()

## Posterior Predictive Checks


In [ ]:
with pool_model:
    ppc = pm.sample_posterior_predictive(trace_pool, var_names=['votes', 'alpha', 'beta', 'sigma'])

In [ ]:
az.plot_ppc(
    az.from_pymc3(
        posterior_predictive=ppc,
        model=pool_model), 
    num_pp_samples = 100,
    legend=False
)

plt.show()

## Plotting Uncertainty


In [ ]:
plot_2020_election_fit(spend_std, vote_std, trace_pool, ppc)

# CONCLUSION
## Key Points 
