# Radon case study

Follows closely with [Bambi's](https://bambinos.github.io/bambi/main/notebooks/radon_example.html#Multilevel-and-hierarchical-models)

In [None]:
from scikit_stan import GLM
import pandas as pd
import numpy as np
import formulae
import scipy.sparse as sp
import arviz as az

In [None]:
radon = pd.read_csv('https://raw.githubusercontent.com/stan-dev/example-models/master/jupyter/radon/data/mn_radon.csv')
counties = pd.read_csv('https://raw.githubusercontent.com/stan-dev/example-models/master/jupyter/radon/data/mn_uranium.csv')

In [None]:
radon["floor"] = radon["floor"].map({0: "Basement", 1: "Floor"})
radon.county = radon.county.map(str.strip)

In [None]:
radon.head()

## Complete pooling

In [None]:
form = formulae.design_matrices("log_radon ~ 0 + floor", radon)
X = np.array(form.common)
y = np.array(form.response)
form

In [None]:
complete_model = GLM( save_log_lik=True)

In [None]:
complete_model.fit(X[:,1:], y)

In [None]:
az.plot_trace(az.from_cmdstanpy(complete_model.fitted_samples_), var_names=['beta', 'sigma'], compact=True)

## No pooling

In [None]:
form = formulae.design_matrices("log_radon ~ 0 + county:floor", radon)
X = sp.csc_matrix(np.array(form.common))
y = np.array(form.response)
form

In [None]:
X

In [None]:
unpooled = GLM(fit_intercept=False, save_log_lik=True)

In [None]:
unpooled.fit(X, y)

In [None]:
az.plot_trace(az.from_cmdstanpy(unpooled.fitted_samples_), var_names=['beta', 'sigma'], compact=True)

In [None]:
infdata = az.from_cmdstanpy(unpooled.fitted_samples_)
infdata.posterior['beta_dim_0'] = form.common.as_dataframe().columns
infdata

In [None]:
az.loo(infdata)

In [None]:
az.plot_forest(data=infdata, var_names=['beta'], figsize=(6, 32), r_hat=True, combined=True, textsize=8)

## Partial pooling

Need group-level data in a fit, or a new model? RSTANARM uses GLMER here. 

In [None]:
x = (formulae.design_matrices('log_radon ~ 1 + (1|county)', radon))
x

In [None]:
# 1s indicating which groups (county) each observation pertains to
# a one in i,j means the ith observation is in group j
np.array(x.group)

In [None]:
formulae.design_matrices("log_radon ~ 0 + floor + log_uranium + (0 + floor|county)", radon)

In [None]:
# y ~ BX + ZC


# C is 85x1

# What is C? Priors?

In [None]:
sucesses = np.random.randint(3,10, size=110)
trials = 13
failures = trials - sucesses
df = pd.DataFrame.from_dict({"y":sucesses,
                             "ybar": failures,
                            "x":np.concatenate([np.zeros(50), np.ones(60)])})


In [None]:
def stack(x1, x2):
    return np.vstack([x1, x2]) 

In [None]:
stack(sucesses, failures)[1]

In [None]:
np.array(formulae.design_matrices("{stack(y, ybar)} ~ x", df).response)

In [None]:
np.array(formulae.design_matrices("{stack(y, ybar)} ~ x", df))

## Roaches

In [None]:
roaches = pd.read_csv('../roaches.csv')
form = formulae.design_matrices("y ~ {roach1 / 100} + treatment + senior)", roaches)
X = np.array(form.common)
y = np.array(form.response)

In [None]:
r_glm = GLM(fit_intercept=False, family="poisson", link="log")

In [None]:
r_glm.fit(X,y)

In [None]:
loo1 = az.loo(az.from_cmdstanpy(r_glm.fitted_samples_))
loo1

In [None]:
az.plot_khat(loo1, show_hlines=True)