In [2]:
%matplotlib inline
%config InlineBackend.figure_format = 'retina'

import pystan
import numpy as np
import pandas as pd
import pymc3 as pm
import arviz as az
import scipy as sp
import matplotlib.pyplot as plt

az.style.use('arviz-darkgrid')

# Hard

## 11H1

In [3]:
## From Pymc3 devs notebook:
def stdz(series: pd.Series):
    """Standardize the given pandas Series"""
    return (series - series.mean())/series.std()

hurricanes = pd.read_csv("/Users/benjaminwee/Documents/courses/resources/Rethinking/Data/hurricanes.csv", sep=";")

# Standardize predictor, as usual
hurricanes["femininity"] = stdz(hurricanes.femininity)
hurricanes.tail()

Unnamed: 0,name,year,deaths,category,min_pressure,damage_norm,female,femininity
87,Gustav,2008,52,2,954,4360,0,-1.567471
88,Ike,2008,84,2,950,20370,0,-1.515826
89,Irene,2011,41,1,952,7110,1,0.773725
90,Isaac,2012,5,1,966,24000,0,-1.498613
91,Sandy,2012,159,2,942,75000,1,0.687651


In [4]:
m_11_h1 = '''
data {
  int N;
  real fem[N];
  int deaths[N];
}
parameters {
  real a;
  real b;
}
model {
  vector[N] lambda;
  target += normal_lpdf(a | 0, 4.5);
  target += normal_lpdf(b | 0, 0.8);
  
  for(n in 1:N) lambda[n] = a + b * fem[n];
    target += poisson_log_lpmf(deaths | lambda);
}
'''

In [5]:
sm_h11_1 = pystan.StanModel(model_code=m_11_h1)

INFO:pystan:COMPILING THE C++ CODE FOR MODEL anon_model_9b8d736d7fdd96f9749c582152ee30c5 NOW.


In [6]:
df_h11_1 = dict(N=len(hurricanes),
                fem = hurricanes['femininity'],
                deaths = hurricanes['deaths'])

In [7]:
fit_h11_1 = sm_h11_1.sampling(data=df_h11_1)

In [10]:
fit_h11_1

Inference for Stan model: anon_model_9b8d736d7fdd96f9749c582152ee30c5.
4 chains, each with iter=2000; warmup=1000; thin=1; 
post-warmup draws per chain=1000, total post-warmup draws=4000.

       mean se_mean     sd   2.5%    25%    50%    75%  97.5%  n_eff   Rhat
a       3.0  4.4e-4   0.02   2.95   2.99    3.0   3.02   3.05   2893    1.0
b      0.24  4.8e-4   0.03   0.19   0.22   0.24   0.26   0.29   2802    1.0
lp__  -2135    0.02   1.01  -2138  -2136  -2135  -2134  -2134   1887    1.0

Samples were drawn using NUTS at Mon May 25 22:52:52 2020.
For each parameter, n_eff is a crude measure of effective sample size,
and Rhat is the potential scale reduction factor on split chains (at 
convergence, Rhat=1).

In [11]:
m_11_h1_2 = '''
data {
  int N;
  int deaths[N];
}
parameters {
  real a;
}
model {
  vector[N] lambda;
  target += normal_lpdf(a | 0, 4.5);
  
  for(n in 1:N) lambda[n] = a;
    target += poisson_log_lpmf(deaths | lambda);
}
'''

In [12]:
sm_h11_2 = pystan.StanModel(model_code=m_11_h1_2)

INFO:pystan:COMPILING THE C++ CODE FOR MODEL anon_model_b9cb89b5ffd8dfc50fa173f2ef609558 NOW.


In [14]:
df_h11_2 = dict(N=len(hurricanes),
                deaths = hurricanes['deaths'])

fit_h11_2 = sm_h11_2.sampling(data=df_h11_2)

In [15]:
fit_h11_2

Inference for Stan model: anon_model_b9cb89b5ffd8dfc50fa173f2ef609558.
4 chains, each with iter=2000; warmup=1000; thin=1; 
post-warmup draws per chain=1000, total post-warmup draws=4000.

       mean se_mean     sd   2.5%    25%    50%    75%  97.5%  n_eff   Rhat
a      3.03  5.7e-4   0.02   2.98   3.01   3.03   3.04   3.07   1614    1.0
lp__  -2181    0.02    0.7  -2183  -2181  -2181  -2181  -2181   1682    1.0

Samples were drawn using NUTS at Mon May 25 22:55:37 2020.
For each parameter, n_eff is a crude measure of effective sample size,
and Rhat is the potential scale reduction factor on split chains (at 
convergence, Rhat=1).

## 11H2

In [18]:
m_11_h2 = '''
data {
  int N;
  real fem[N];
  int deaths[N];
}
parameters {
  real a;
  real b;
  real phi;
}
model {
  vector[N] lambda;
  target += normal_lpdf(a | 0, 4.5);
  target += normal_lpdf(b | 0, 0.8);
  target += exponential_lpdf(phi | 1);
  
  for(n in 1:N) lambda[n] = a + b * fem[n];
    target += neg_binomial_2_lpmf(deaths | lambda, phi);
}
'''

In [19]:
sm_h11_h2 = pystan.StanModel(model_code=m_11_h2)

INFO:pystan:COMPILING THE C++ CODE FOR MODEL anon_model_1d8a9bd5a6209aa2e7f7b2ebc134060b NOW.


In [21]:
df_h11_h2 = dict(N=len(hurricanes),
                fem = hurricanes['femininity'],
                deaths = hurricanes['deaths'])

fit_h11_h2 = sm_h11_h2.sampling(data=df_h11_h2)

fit_h11_h2

Inference for Stan model: anon_model_1d8a9bd5a6209aa2e7f7b2ebc134060b.
4 chains, each with iter=2000; warmup=1000; thin=1; 
post-warmup draws per chain=1000, total post-warmup draws=4000.

       mean se_mean     sd   2.5%    25%    50%    75%  97.5%  n_eff   Rhat
a     15.97    0.03   1.86  12.55  14.66  15.89  17.21  19.89   3361    1.0
b      0.37    0.01   0.77  -1.14  -0.15   0.38   0.89   1.86   3221    1.0
phi    0.44  1.1e-3   0.06   0.33    0.4   0.44   0.48   0.57   2961    1.0
lp__ -364.9    0.03   1.23 -368.1 -365.5 -364.6 -364.0 -363.5   1955    1.0

Samples were drawn using NUTS at Mon May 25 23:00:30 2020.
For each parameter, n_eff is a crude measure of effective sample size,
and Rhat is the potential scale reduction factor on split chains (at 
convergence, Rhat=1).

## 11H3

In [22]:
for col in ["femininity", "min_pressure", "damage_norm"]:
    hurricanes[f"{col}_std"] = stdz(hurricanes[col])

In [83]:
hurricanes['inter'] = hurricanes['femininity_std'] * hurricanes['min_pressure_std']

In [84]:
m_11_h3_1 = '''
data {
  int N;
  int deaths[N];
  real fem[N];
  real min_pressure_std[N];
  real inter[N];
}
parameters {
  real a;
  real bf;
  real bp;
  real bfp;
}
model {
  vector[N] lambda;
  target += normal_lpdf(a | 0, 4.5);
  target += normal_lpdf(bf | 0, 0.8);
  target += normal_lpdf(bp| 0, 0.8);
  target += normal_lpdf(bfp | 0, 0.8);
    
  for(n in 1:N) lambda[n] = a + bf*fem[N]+bp*min_pressure_std[N]+bfp*inter[N];
    target += poisson_log_lpmf(deaths | lambda);
}
'''

In [85]:
sm_h11_h3_1 = pystan.StanModel(model_code=m_11_h3_1)
df_h11_h3_1 = dict(N=len(hurricanes),
                fem = hurricanes['femininity_std'],
                deaths = hurricanes['deaths'],
                inter = hurricanes['inter'],
                min_pressure_std = hurricanes['min_pressure_std'])

INFO:pystan:COMPILING THE C++ CODE FOR MODEL anon_model_481729241d3a7e471dd23d86dfb4a56d NOW.


In [86]:
fit_h11_h3_1 = sm_h11_h3_1.sampling(data=df_h11_h3_1)

fit_h11_h3_1


Inference for Stan model: anon_model_481729241d3a7e471dd23d86dfb4a56d.
4 chains, each with iter=2000; warmup=1000; thin=1; 
post-warmup draws per chain=1000, total post-warmup draws=4000.

       mean se_mean     sd   2.5%    25%    50%    75%  97.5%  n_eff   Rhat
a      2.74    0.03   1.21   0.33   1.93   2.75   3.53   5.14   1831    1.0
bf     0.08    0.02   0.79  -1.42  -0.45   0.07    0.6   1.66   2227    1.0
bp    -0.14    0.02   0.76  -1.62  -0.65  -0.13   0.36   1.34   2136    1.0
bfp   -0.08    0.02   0.81   -1.7  -0.63  -0.09   0.47   1.54   2278    1.0
lp__  -2185    0.04   1.39  -2188  -2185  -2184  -2184  -2183   1404    1.0

Samples were drawn using NUTS at Mon May 25 23:43:10 2020.
For each parameter, n_eff is a crude measure of effective sample size,
and Rhat is the potential scale reduction factor on split chains (at 
convergence, Rhat=1).

In [None]:
m_11_h3_2 = '''
data {
  int N;
  int deaths[N];
  real fem[N];
  real min_pressure_std[N];
}
parameters {
  real a;
  real bf;
  real bp;
}
model {
  vector[N] lambda;
  target += normal_lpdf(a | 0, 4.5);
  target += normal_lpdf(bf | 0, 0.8);
  target += normal_lpdf(bp| 0, 0.8);
    
  for(n in 1:N) lambda[n] = a + bf*fem[N]+bp*min_pressure_std[N];
    target += poisson_log_lpmf(deaths | lambda);
}
'''

In [None]:
sm_11_h3_2 = pystan.StanModel(model_code=m_11_h3_2)
df_h11_h3_2 = dict(N=len(hurricanes),
                fem = hurricanes['femininity_std'],
                deaths = hurricanes['deaths'],
                min_pressure_std = hurricanes['min_pressure_std'])




In [None]:
fit_h11_h3_2 = sm_11_h3_2.sampling(data=df_h11_h3_2)

fit_h11_h3_1

In [51]:
m_11_h3_3 = '''
data {
  int N;
  int deaths[N];
  real fem[N];
  real damage_std[N];
  real inter_damage[N];
}
parameters {
  real a;
  real bf;
  real bd;
  real bfd;
}
model {
  vector[N] lambda;
  target += normal_lpdf(a | 0, 4.5);
  target += normal_lpdf(bf | 0, 0.8);
  target += normal_lpdf(bd| 0, 0.8);
  target += normal_lpdf(bfd | 0, 0.8);
    
  for(n in 1:N) lambda[n] = a + bf*fem[N]+bd*damage_std[N]+bfd*inter_damage[N];
    target += poisson_log_lpmf(deaths | lambda);
}
'''

In [55]:
hurricanes['inter_damage'] = hurricanes['femininity_std'] * hurricanes['damage_norm_std']
sm_11_h3_3 = pystan.StanModel(model_code=m_11_h3_3)

df_h11_h3_3 = dict(N=len(hurricanes),
                fem = hurricanes['femininity_std'],
                deaths = hurricanes['deaths'],
                damage_std = hurricanes['damage_norm_std'],
                inter_damage = hurricanes['inter_damage'])




INFO:pystan:COMPILING THE C++ CODE FOR MODEL anon_model_2d8e22ab7fdc9a378bed52ba460e491a NOW.


In [56]:
fit_h11_h3_3 = sm_11_h3_3.sampling(data=df_h11_h3_3)

fit_h11_h3_3



Inference for Stan model: anon_model_2d8e22ab7fdc9a378bed52ba460e491a.
4 chains, each with iter=2000; warmup=1000; thin=1; 
post-warmup draws per chain=1000, total post-warmup draws=4000.

       mean se_mean     sd   2.5%    25%    50%    75%  97.5%  n_eff   Rhat
a      1.32    0.08   3.43  -5.71  -0.97   1.22    3.7   7.96   2048    1.0
bf     0.03    0.02   0.81  -1.59  -0.52   0.03   0.56   1.65   2663    1.0
bd     0.21    0.01   0.62  -1.03   -0.2   0.21   0.65   1.37   2046    1.0
bfd    0.16    0.01    0.7  -1.21  -0.31   0.16   0.65   1.51   2345    1.0
lp__  -2185    0.04   1.36  -2188  -2185  -2184  -2184  -2183   1270    1.0

Samples were drawn using NUTS at Mon May 25 23:26:20 2020.
For each parameter, n_eff is a crude measure of effective sample size,
and Rhat is the potential scale reduction factor on split chains (at 
convergence, Rhat=1).

In [59]:
m_11_h3_4 = '''
data {
  int N;
  int deaths[N];
  real fem[N];
  real damage_std[N];
  real inter_damage[N];
}
parameters {
  real a;
  real bf;
  real bd;
  real bfd;
  real phi;
}
model {
  vector[N] lambda;
  target += normal_lpdf(a | 0, 4.5);
  target += normal_lpdf(bf | 0, 0.8);
  target += normal_lpdf(bd| 0, 0.8);
  target += normal_lpdf(bfd | 0, 0.8);
  target += exponential_lpdf(phi | 1);
    
  for(n in 1:N) lambda[n] = a + bf*fem[N]+bd*damage_std[N]+bfd*inter_damage[N];
    target += neg_binomial_2_lpmf(deaths | lambda, phi);
}
'''


In [60]:
sm_11_h3_4 = pystan.StanModel(model_code=m_11_h3_4)

df_h11_h3_4 = dict(N=len(hurricanes),
                fem = hurricanes['femininity_std'],
                deaths = hurricanes['deaths'],
                damage_std = hurricanes['damage_norm_std'],
                inter_damage = hurricanes['inter_damage'])



INFO:pystan:COMPILING THE C++ CODE FOR MODEL anon_model_a9698d5beede2c677c71616af9286b6b NOW.


In [61]:
fit_h11_h3_4 = sm_11_h3_3.sampling(data=df_h11_h3_4)

fit_h11_h3_4



Inference for Stan model: anon_model_2d8e22ab7fdc9a378bed52ba460e491a.
4 chains, each with iter=2000; warmup=1000; thin=1; 
post-warmup draws per chain=1000, total post-warmup draws=4000.

       mean se_mean     sd   2.5%    25%    50%    75%  97.5%  n_eff   Rhat
a      1.31    0.09   3.46  -5.48  -1.02   1.31   3.59   8.17   1611    1.0
bf     0.02    0.02   0.79  -1.49   -0.5   0.02   0.55   1.58   2353    1.0
bd     0.24    0.02   0.65  -1.02  -0.19   0.22   0.67   1.51   1562    1.0
bfd    0.13    0.02   0.75  -1.37  -0.39   0.15   0.65   1.61   2294    1.0
lp__  -2185    0.05    1.5  -2188  -2185  -2184  -2184  -2183   1016    1.0

Samples were drawn using NUTS at Mon May 25 23:28:09 2020.
For each parameter, n_eff is a crude measure of effective sample size,
and Rhat is the potential scale reduction factor on split chains (at 
convergence, Rhat=1).

## 11H6

In [64]:
fish = pd.read_csv("/Users/benjaminwee/Documents/courses/resources/Rethinking/Data/fish.csv", sep=";")
fish['log_hours'] = np.log(fish['hours'])
fish.head()

Unnamed: 0,fish_caught,livebait,camper,persons,child,hours,log_hours
0,0,0,0,1,0,21.124,3.05041
1,0,1,1,1,0,5.732,1.746065
2,0,1,0,1,0,1.323,0.279902
3,0,1,1,2,1,0.548,-0.60148
4,1,1,0,1,0,1.695,0.527683


In [69]:
m_11_h6_1 = '''
data {
  int N;
  real log_hours[N];
  int fish[N];
}
parameters {
  real ap;
  real al;
}
model {
  vector[N] p;
  vector[N] lambda;
  target += normal_lpdf(ap | 0, 10);
  target += normal_lpdf(al | 0, 10);
  
  for(i in 1:N){
    p[i] = inv_logit(ap);
    lambda[i] = exp(log_hours[i] + al);
    if(fish[i] == 0)
      target += log_sum_exp(bernoulli_lpmf(1 | p[i]), 
                            bernoulli_lpmf(0 | p[i]) + poisson_lpmf(fish[i] | lambda[i]));
    else
      target += bernoulli_lpmf(0 | p[i]) + poisson_lpmf(fish[i] | lambda[i]);
  }
}
'''


In [71]:
sm_11_h6_1 = pystan.StanModel(model_code=m_11_h6_1)

df_h11_h6_1 = dict(N=len(fish),
                log_hours = fish['log_hours'],
                fish = fish['fish_caught'])


INFO:pystan:COMPILING THE C++ CODE FOR MODEL anon_model_dbe90a9b52718a8ad777decfaa1d6a1b NOW.


In [72]:
fit_h11_h6_1 = sm_11_h6_1.sampling(data=df_h11_h6_1)

fit_h11_h6_1

Inference for Stan model: anon_model_dbe90a9b52718a8ad777decfaa1d6a1b.
4 chains, each with iter=2000; warmup=1000; thin=1; 
post-warmup draws per chain=1000, total post-warmup draws=4000.

       mean se_mean     sd   2.5%    25%    50%    75%  97.5%  n_eff   Rhat
ap    -0.75  3.0e-3   0.18  -1.11  -0.87  -0.75  -0.63   -0.4   3654    1.0
al    -0.14  5.6e-4   0.04  -0.21  -0.17  -0.15  -0.12  -0.07   3934    1.0
lp__  -1264    0.02   0.96  -1267  -1264  -1264  -1263  -1263   1827    1.0

Samples were drawn using NUTS at Mon May 25 23:37:53 2020.
For each parameter, n_eff is a crude measure of effective sample size,
and Rhat is the potential scale reduction factor on split chains (at 
convergence, Rhat=1).

In [79]:
m_11_h6_2 = '''
data {
  int N;
  real log_hours[N];
  int fish[N];
  int persons[N];
  int child[N];
}
parameters {
  real ap;
  real al;
  real bp0;
  real bc0;
  real bp;
  real bc;
}
model {
  vector[N] p;
  vector[N] lambda;
  target += normal_lpdf(ap | 0, 10);
  target += normal_lpdf(al | 0, 10);
  target += normal_lpdf(bp0 | 0, 1);
  target += normal_lpdf(bc0 | 0, 1);
  target += normal_lpdf(bp| 0, 1);
  target += normal_lpdf(bc | 0, 1);
  
  for(i in 1:N){
    p[i] = inv_logit(ap + bp0*persons[i] + bc0*child[i]);
    lambda[i] = exp(log_hours[i] + al + bp*persons[i] + bc*child[i]);
    if(fish[i] == 0)
      target += log_sum_exp(bernoulli_lpmf(1 | p[i]), 
                            bernoulli_lpmf(0 | p[i]) + poisson_lpmf(fish[i] | lambda[i]));
    else
      target += bernoulli_lpmf(0 | p[i]) + poisson_lpmf(fish[i] | lambda[i]);
  }
}
'''


In [80]:
sm_11_h6_2 = pystan.StanModel(model_code=m_11_h6_2)

df_h11_h6_2 = dict(N=len(fish),
                  log_hours = fish['log_hours'],
                  fish = fish['fish_caught'],
                  child = fish['child'],
                  persons = fish['persons'])


INFO:pystan:COMPILING THE C++ CODE FOR MODEL anon_model_c32a3ce4c02e39ec64c11fedc2fb2436 NOW.


In [82]:
fit_h11_h6_2 = sm_11_h6_2.sampling(data=df_h11_h6_2)

fit_h11_h6_2

Inference for Stan model: anon_model_c32a3ce4c02e39ec64c11fedc2fb2436.
4 chains, each with iter=2000; warmup=1000; thin=1; 
post-warmup draws per chain=1000, total post-warmup draws=4000.

       mean se_mean     sd   2.5%    25%    50%    75%  97.5%  n_eff   Rhat
ap     0.76    0.01   0.55   -0.3   0.39   0.76   1.13   1.82   1839    1.0
al     -2.3  3.5e-3   0.15   -2.6  -2.41   -2.3   -2.2  -2.02   1837    1.0
bp0   -1.01  6.3e-3   0.27  -1.56  -1.18   -1.0  -0.83  -0.52   1820    1.0
bc0    1.01    0.01   0.56  -0.26   0.68   1.05   1.38   1.95   2558    1.0
bp     0.67  9.9e-4   0.04   0.59   0.65   0.67    0.7   0.76   1794    1.0
bc     0.56  1.8e-3   0.09   0.37    0.5   0.56   0.62   0.74   2720    1.0
lp__  -1050    0.05   1.82  -1054  -1051  -1050  -1049  -1047   1406    1.0

Samples were drawn using NUTS at Mon May 25 23:40:44 2020.
For each parameter, n_eff is a crude measure of effective sample size,
and Rhat is the potential scale reduction factor on split chains (at 
co