In [None]:
import pandas as pd
import pymc3 as pm
import matplotlib.pyplot as plt
import numpy as np

# For deterministic reproducibility.
np.random.seed(42)

%load_ext autoreload
%autoreload 2
%matplotlib inline
%config InlineBackend.figure_format = 'retina'

In [None]:
df = pd.read_csv('../data/baseballdb/core/Batting.csv')
df['AB'] = df['AB'].replace(0, np.nan)
df = df.dropna()
df['batting_avg'] = df['H'] / df['AB']
df = df[df['yearID'] >= 2016]
df = df.iloc[0:15]
df.head(5)

In [None]:
df

In [None]:
import theano.tensor as tt

with pm.Model() as baseball_model:
    
    phi = pm.Uniform('phi', lower=0.0, upper=1.0)
    kappa_log = pm.Exponential('kappa_log', lam=1.5)
    kappa = pm.Deterministic('kappa', tt.exp(kappa_log))

    thetas = pm.Beta('thetas', alpha=phi*kappa, beta=(1.0-phi)*kappa, shape=len(df))
    like = pm.Binomial('like', n=df['AB'], p=thetas, observed=df['H'])

In [None]:
with baseball_model:
    trace = pm.sample(2000, nuts_kwargs={'target_accept': 0.95})

In [None]:
pm.traceplot(trace)

In [None]:
ylabels = "AB: " + df['AB'].astype(str) + ', H: ' + df['H'].astype('str')
pm.forestplot(trace, varnames=['thetas'], ylabels=ylabels)

With a hierarchical model, we make the assumption that our observations (or treatments that group our observations) are somehow related. Under this assumption, when we have a new sample for which we have very few observations, we are able to borrow power from the population to make inferences about the new sample. 

Depending on the scenario, this assumption can either be reasonable, thereby not necessitating much debate, or be considered a "strong assumption", thereby requiring strong justification. 

# Summary

- Hierarchical modelling assumes a "hierarchical structure" that governs relatedness between our observed samples.
- Under the assumption of hierarchical structure holding true, we will not get estimates that one may consider to be absurd (e.g. long-run batting probability estimated to be zero or one, on the basis onf few observations).