# Radon case study

Follows closely with [Bambi's](https://bambinos.github.io/bambi/main/notebooks/radon_example.html#Multilevel-and-hierarchical-models)

In [None]:
from scikit_stan import GLM
import pandas as pd
import numpy as np
import formulae
import arviz as az

In [None]:
radon = pd.read_csv('https://raw.githubusercontent.com/stan-dev/example-models/cmdstanpy-case-study-1/jupyter/radon/data/mn_radon.csv')
counties = pd.read_csv('https://raw.githubusercontent.com/stan-dev/example-models/cmdstanpy-case-study-1/jupyter/radon/data/mn_counties.csv')

In [None]:
radon["floor"] = radon["floor"].map({0: "Basement", 1: "Floor"})
radon.county = radon.county.map(str.strip)

In [None]:
radon.head()

## Complete pooling

In [None]:
form = formulae.design_matrices("log_radon ~ 0 + floor", radon)
X = np.array(form.common)
y = np.array(form.response)
form

In [None]:
complete_model = GLM(fit_intercept=False)

In [None]:
complete_model.fit(X, y)

In [None]:
az.plot_trace(az.from_cmdstanpy(complete_model.fitted_samples_), var_names=['beta', 'sigma'], compact=True)

## No pooling

In [None]:
form = formulae.design_matrices("log_radon ~ 0 + county:floor", radon)
X = np.array(form.common)
y = np.array(form.response)
form

In [None]:
unpooled = GLM(fit_intercept=False)

In [None]:
unpooled.fit(X,y)

In [None]:
az.plot_trace(az.from_cmdstanpy(unpooled.fitted_samples_), var_names=['beta', 'sigma'], compact=True)

In [None]:
infdata = az.from_cmdstanpy(unpooled.fitted_samples_)
infdata.posterior['beta_dim_0'] = form.common.as_dataframe().columns

In [None]:
az.plot_forest(data=infdata, var_names=['beta'], figsize=(6, 32), r_hat=True, combined=True, textsize=8)

## Partial pooling

Need group-level data in a fit, or a new model? RSTANARM uses GLMER here. 

In [None]:
(formulae.design_matrices('log_radon ~ 1 + (1|county)', radon).group)