In [None]:
import pymc3 as pm
import matplotlib.pyplot as plt
import numpy as np
from data import load_finches_2012, load_finches_1975
from utils import ECDF

%load_ext autoreload
%autoreload 2
%matplotlib inline
%config InlineBackend.figure_format = 'retina'

In [None]:
df12 = load_finches_2012()
df12['shape'] = df12['beak_depth'] / df12['beak_length']

df12 = df12[df12['species'] != 'unknown']
df75 = load_finches_1975()

df = df12  # convenient alias

In [None]:
df12.head(5)

In [None]:
fortis_idx = df[df['species'] == 'fortis'].index
scandens_idx = df[df['species'] == 'scandens'].index

In [None]:
# Mega-model incorporating shape as well. 
# We will also analyze the SD in addition to the mean.

with pm.Model() as beak_model:
    # SD can only be positive, therefore it is reasonable to constrain to >0
    # Likewise for betas.
    sd_hyper = pm.HalfCauchy('sd_hyper', beta=100, shape=(2,))
    beta_hyper = pm.HalfCauchy('beta_hyper', beta=100, shape=(2,))
    
    # Beaks cannot be of "negative" mean, therefore, HalfNormal is 
    # a reasonable, constrained prior.
    mean_depth = pm.HalfNormal('mean_depth', sd=sd_hyper[0], shape=(2,))
    sd_depth = pm.HalfCauchy('sd_depth', beta=beta_hyper[0], shape=(2,))
    
    mean_length = pm.HalfNormal('mean_length', sd=sd_hyper[1], shape=(2,))
    sd_length = pm.HalfCauchy('sd_length', beta=beta_hyper[1], shape=(2,))

    nu = pm.Exponential('nu', lam=1/29.) + 1
    
    # Define the likelihood distribution for the data.
    depth = pm.StudentT('depth', 
                        nu=nu,
                        mu=mean_depth[df['species_enc']], 
                        sd=sd_depth[df['species_enc']], 
                        observed=df['beak_depth'])
    
    length = pm.StudentT('length',
                         nu=nu,
                         mu=mean_length[df['species_enc']],
                         sd=sd_length[df['species_enc']],
                         observed=df['beak_length'])
    
    shape = pm.Deterministic('shape', depth / length)

In [None]:
with beak_model:
    trace = pm.sample(2000)

In [None]:
pm.traceplot(trace, varnames=['mean_length', 'mean_depth'])

In [None]:
pm.traceplot(trace, varnames=['sd_length', 'sd_depth'])

In [None]:
samples = pm.sample_ppc(trace, model=beak_model)
samples

In [None]:
fig = plt.figure()
ax = fig.add_subplot(111)
x, y = ECDF((samples['depth'][:, fortis_idx] / samples['length'][:, fortis_idx]).flatten())
ax.plot(x, y)
x, y = ECDF(df.loc[fortis_idx, 'shape'])
ax.plot(x, y)

In [None]:
fig = plt.figure()
ax = fig.add_subplot(111)
x, y = ECDF(df['shape'])
ax.plot(x, y, label='data')
# x, y = ECDF(trace['shape'][0, :])
# ax.plot(x, y, label='posterior')
