In [None]:
from scikit_stan import GLM

import numpy as np
import pandas as pd  # type: ignore

import matplotlib as mpl
import matplotlib.pyplot as plt

from scipy.special import expit

from sklearn.model_selection import GridSearchCV

from pathlib import Path

In [None]:
mpl.rc('axes.spines', top=True, bottom=True, left=True, right=True)
#mpl.rc('axes', facecolor='white')
mpl.rc("xtick", bottom=True, labelbottom=True)
mpl.rc("ytick", left=True, labelleft=True)
mpl.style.use('ggplot')


# center images
from IPython.core.display import HTML
HTML("""
<style>
.jp-RenderedImage, .output_png{
    display: table-cell;
    text-align: center;
    vertical-align: middle;
}
</style>
""")


This package integrates with sklearn optimization algorithms to optimize hyperparameters. In particular, the space of distributions and distribution parameters, which serve as the hyperparameters of the Bayesian model, can be optimized across with grid search. What's more, this optimization can be perfomed at a higher meta level by working with hyperpriors and tuning hyperhyperparameters. We give an example of using sk-learn's GridSearchCV to perform hyperparameter optimization and demonstrate how the package integrates with the rest of the sk-learn ecosystem.

Keeping the priors on the regression coefficients at the default autoscaled normal, we perform a grid search over 
1. The error scale of the intercept's prior distribution, represented by $\sigma_\alpha$,     
2. The error scale for the family of $y$, represented by $\sigma_y$.



NOTE: In principle, this problem could be a two level hyperparameter optimization problem: an optimization over the hyperparameter $\sigma_\alpha$ and then an optimization over the distribution parameter for $\sigma_\alpha$. Keep it simple for now...  

In [None]:
# Import radon data
# based on: https://mc-stan.org/users/documentation/case-studies/radon.html
srrs2 = pd.read_csv(Path.cwd() / "data" / "srrs2.dat")
srrs2.columns = srrs2.columns.map(str.strip)
srrs_mn = srrs2.assign(fips=srrs2.stfips*1000 + srrs2.cntyfips)[srrs2.state=='MN']

In [None]:
cty = pd.read_csv(Path.cwd() / "data" / "cty.dat")
cty_mn = cty[cty.st=='MN'].copy()
cty_mn[ 'fips'] = 1000*cty_mn.stfips + cty_mn.ctfips

In [None]:
srrs_mn = srrs_mn.merge(cty_mn[['fips', 'Uppm']], on='fips')
srrs_mn = srrs_mn.drop_duplicates(subset='idnum')
u = np.log(srrs_mn.Uppm)

n = len(srrs_mn)

In [None]:
srrs_mn.county = srrs_mn.county.str.strip()
mn_counties = srrs_mn.county.unique()
counties = len(mn_counties)

In [None]:
county_lookup = dict(zip(mn_counties, range(len(mn_counties))))
county = srrs_mn['county_code'] = srrs_mn.county.replace(county_lookup).values
radon = srrs_mn.activity
srrs_mn['log_radon'] = log_radon = np.log(radon + 0.1).values
floor_measure = srrs_mn.floor.values

In [None]:
srrs_mn.activity.apply(lambda x: np.log(x+0.1)).hist(bins=25)

In [None]:
glm = GLM(family="gaussian", link="identity")

fitted = glm.fit(floor_measure, log_radon)
xvals = np.linspace(-0.2, 1.2)
plt.scatter(srrs_mn.floor, np.log(srrs_mn.activity+0.1))

plt.plot(xvals, fitted.alpha_ + fitted.beta_*xvals, 'r--') 


##### Varying Slope Model 

In [None]:
%%capture
# this actually is a hyperhyperparameter optimization problem
# optimize over prior_aux_param, which is a hyperhyperparameter 
# as it is the hyperparameter for the prior for sigma, which is used 
# as a parameter in the family distribution   
glm_opt = GLM(family="gaussian", link="identity")

param_range = np.linspace(0.1, 1, 5)

param_grid = {
        "prior_aux" :
            list({
                "prior_aux_dist": "exponential", 

                "prior_aux_param": aux_val
            }
            for aux_val in param_range)
}


grid_search = GridSearchCV(glm, param_grid=param_grid);

searched = grid_search.fit(floor_measure, log_radon);


print(grid_search.cv_results_)

#fitted = glm.fit(X=floor_measure, y=log_radon);

#fitted.alpha_, fitted.beta_, fitted.sigma_

In [None]:
%%capture
# this actually is a hyperhyperparameter optimization problem
# optimize over prior_aux_param, which is a hyperhyperparameter 
# as it is the hyperparameter for the prior for sigma, which is used 
# as a parameter in the family distribution   
glm_opt2 = GLM(family="gaussian", link="identity")

param_range = np.linspace(0.1, 0.5, 5)

param_grid = {
        "prior_intercept" :
            list({
                "prior_intercept_dist": "normal", 

                "prior_intercept_mu": 1.5,

                "prior_intercept_sigma": sigma_alpha_val
            }
            for sigma_alpha_val in param_range)
}


grid_search2 = GridSearchCV(glm, param_grid=param_grid);

searched = grid_search2.fit(floor_measure, log_radon);


print(grid_search.cv_results_)

#fitted = glm.fit(X=floor_measure, y=log_radon);

#fitted.alpha_, fitted.beta_, fitted.sigma_

In [None]:
gs_rez2 = grid_search2.cv_results_
gs_rez2


In [None]:
gs_rez = grid_search.cv_results_
gs_rez


In [None]:
mean_y = gs_rez2['mean_test_score']
plt.plot(param_range, mean_y[::-1])
plt.ylabel("Mean Test Score")
plt.xlabel(f"Prior Intercept $\sigma_\\alpha$ Parameter")
plt.title(f"Score with Gridsearch on Prior Intercept $\sigma_\\alpha$ Parameter")

In [None]:
glm.fit(floor_measure, log_radon)

In [None]:
glm.alpha_, glm.beta_, glm.sigma_

In [None]:
import arviz as az
az.style.use("arviz-darkgrid")

In [None]:
t =az.from_cmdstanpy(fitted.fitted_samples_)
az.plot_trace(t, var_names=["alpha", "beta", "sigma"])