# Introduction

In this notebook, we will look at how hierarchical models can help us in scenarios where we do not have sufficient data to meaningfully quantify our uncertainties. By making assumptions about relatedness, we can borrow statistical power from other observations, 

In [None]:
import pandas as pd
import janitor as jn
import pymc3 as pm

%load_ext autoreload
%autoreload 2
%matplotlib inline
%config InlineBackend.figure_format = 'retina'

In [None]:
df = pd.read_csv('../data/finch_beaks_2012.csv')
df = (jn.DataFrame(df)
      .clean_names()
      .rename_column('blength', 'beak_length')
      .rename_column('bdepth', 'beak_depth')
      .encode_categorical('species')
     )
df.groupby('species').size()

In [None]:
df.groupby('species')['beak_depth'].describe()

In [None]:
fortis_filter = df['species'] == 'fortis'
scandens_filter = df['species'] == 'scandens'
unknown_filter = df['species'] == 'unknown'

In [None]:
def enc_species(x):
    if x == 'fortis':
        return 0
    if x == 'scandens':
        return 1
    else:
        return 2

In [None]:
df['species_enc'] = df['species'].apply(lambda x: enc_species(x))
df['species_enc'].head(5)

In [None]:
with pm.Model() as beak_depth_model:
    sd_prior = pm.HalfCauchy('sd_prior', beta=100)
    beta_prior = pm.HalfCauchy('beta_prior', beta=100)
    
    finches_mean = pm.HalfNormal('finches_mean', sd=sd_prior, shape=(3,))
    finches_sd = pm.HalfCauchy('finches_sd', beta=beta_prior, shape=(3,))
    
    fortis_mean = pm.Deterministic('fortis_mean', finches_mean[df[fortis_filter]['species_enc']])
    scandens_mean = pm.Deterministic('scandens_mean', finches_mean[df[scandens_filter]['species_enc']])
    
    fortis_sd = pm.Deterministic('fortis_sd', finches_sd[df[fortis_filter]['species_enc']])
    scandens_sd = pm.Deterministic('scandens_sd', finches_sd[df[scandens_filter]['species_enc']])
    
    unknown_mean = pm.Deterministic('unknown_mean', finches_mean[df[unknown_filter]['species_enc']])
    unknown_sd = pm.Deterministic('unknown_sd', finches_sd[df[unknown_filter]['species_enc']])
    
    nu = pm.Exponential('nu', lam=1/29.) + 1
    
    like_fortis = pm.StudentT('like_fortis', mu=fortis_mean, sd=fortis_sd, nu=nu,
                              observed=df[fortis_filter]['beak_depth'])
    like_scandens = pm.StudentT('like_scandens', mu=scandens_mean, sd=scandens_sd, nu=nu,
                                 observed=df[scandens_filter]['beak_depth'])
    like_unknown = pm.StudentT('like_unknown', mu=unknown_mean, sd=unknown_sd, nu=nu,
                               observed=df[unknown_filter]['beak_depth'])

In [None]:
with beak_depth_model:
    trace = pm.sample(2000, nuts_kwargs={'target_accept': 0.95})

In [None]:
pm.plot_posterior(trace, varnames=['finches_mean'], )