In [1]:
import arviz as az
import matplotlib.pyplot as plt
import numpy as np
import pymc3 as pm
import scipy.stats as stats
import seaborn as sns
import pandas as pd

from scipy import stats
# logistic (or inverse-logit) is the inverse of the logit function
from scipy.special import expit as logistic

import warnings
warnings.simplefilter(action="ignore", category=FutureWarning)

from causalgraphicalmodels import CausalGraphicalModel
import daft



This is based on the [homework of week 8](https://github.com/rmcelreath/statrethinking_winter2019/blob/master/homework/week08.pdf) of Statistical Rethinking book material.
I do the homework based on the questions but not following a specific order.
<br>
<br>
The dataset can be found [here](https://github.com/rmcelreath/rethinking/blob/Experimental/data/)
<br>

Problem 3)
<br>
Return to the Trolley data, data(Trolley), from Chapter 12. Define and fit a varying intercepts model for these data. By this I mean to add an intercept parameter for the individual to the linear model. Cluster the varying intercepts on individual participants, as indicated by the unique values in the id variable. Include, action, intention, and contact as before. Compare the varying intercepts model and a model that ignores individuals, usint both WAIC/LOO and posterior predictions. What's the impact of individual variation in these data? 

This dataset is on the famous [Trolley Dilemma]('https://www.iflscience.com/editors-blog/trolley-dilemma-would-you-kill-one-person-save-five/')

In [2]:
df = pd.read_csv('../data/Trolley.csv', sep=';')
df

Unnamed: 0,case,response,order,id,age,male,edu,action,intention,contact,story,action2
0,cfaqu,4,2,96;434,14,0,Middle School,0,0,1,aqu,1
1,cfbur,3,31,96;434,14,0,Middle School,0,0,1,bur,1
2,cfrub,4,16,96;434,14,0,Middle School,0,0,1,rub,1
3,cibox,3,32,96;434,14,0,Middle School,0,1,1,box,1
4,cibur,3,4,96;434,14,0,Middle School,0,1,1,bur,1
...,...,...,...,...,...,...,...,...,...,...,...,...
9925,ilpon,3,23,98;299,66,1,Graduate Degree,0,1,0,pon,0
9926,ilsha,6,15,98;299,66,1,Graduate Degree,0,1,0,sha,0
9927,ilshi,7,7,98;299,66,1,Graduate Degree,0,1,0,shi,0
9928,ilswi,2,18,98;299,66,1,Graduate Degree,0,1,0,swi,0


In [3]:
df['id_n'] = pd.factorize(df.id)[0]

## m1: Linear Model

In [4]:
with pm.Model() as m1:
    A = pm.Data('A', df.action.values)
    I = pm.Data('I', df.intention.values)
    C = pm.Data('C', df.contact.values)
    
    bA = pm.Normal('bA', 0, 0.5)
    bI = pm.Normal('bI', 0, 0.5)
    bC = pm.Normal('bC', 0, 0.5)
    bIA = pm.Normal('bIA', 0, 0.5)
    bIC = pm.Normal('bIC', 0, 0.5)
    
    cutpoints = pm.Normal('cutpoints', 0, 1.5, 
                          transform=pm.distributions.transforms.ordered, 
                          shape=len(df.response.unique())-1, 
                          testval=np.arange(6)-2.5)


    BI = bI + bIA*A + bIC*C
    phi = bA*A + bC*C + BI*I

    response = pm.OrderedLogistic('response', phi, cutpoints, observed=df.response.values-1)

    m1_trace = pm.sample()

Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...
Multiprocess sampling (4 chains in 4 jobs)
NUTS: [cutpoints, bIC, bIA, bC, bI, bA]


Sampling 4 chains for 1_000 tune and 1_000 draw iterations (4_000 + 4_000 draws total) took 104 seconds.


## m2: Model with varying intercepts

In [5]:
with pm.Model() as m2:
    A = pm.Data('A', df.action.values)
    I = pm.Data('I', df.intention.values)
    C = pm.Data('C', df.contact.values)
    
    bA = pm.Normal('bA', 0, 0.5)
    bI = pm.Normal('bI', 0, 0.5)
    bC = pm.Normal('bC', 0, 0.5)
    bIA = pm.Normal('bIA', 0, 0.5)
    bIC = pm.Normal('bIC', 0, 0.5)
    
    bid_sig = pm.Exponential('bid_sig', 0.5)
    #bid = pm.Normal('bid', 0, 1, shape=len(df.id_n.unique()))
    bid = pm.Normal('bid', 0, bid_sig, shape=len(df.id_n.unique()))
    
    cutpoints = pm.Normal('cutpoints', 0, 1.5, 
                          transform=pm.distributions.transforms.ordered, 
                          shape=len(df.response.unique())-1, 
                          testval=np.arange(6)-2.5)


    BI = bI + bIA*A + bIC*C
    phi = bid[df.id_n.values] + bA*A + bC*C + BI*I

    response = pm.OrderedLogistic('response', phi, cutpoints, observed=df.response.values-1)

    m2_trace = pm.sample()

Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...
Multiprocess sampling (4 chains in 4 jobs)
NUTS: [cutpoints, bid, bid_sig, bIC, bIA, bC, bI, bA]


Sampling 4 chains for 1_000 tune and 1_000 draw iterations (4_000 + 4_000 draws total) took 180 seconds.
The estimated number of effective samples is smaller than 200 for some parameters.


In [6]:
az.summary(m1_trace, kind='stats', round_to=2, var_names=['~cutpoints'])

Unnamed: 0,mean,sd,hdi_3%,hdi_97%
bA,-0.47,0.05,-0.57,-0.37
bI,-0.29,0.06,-0.41,-0.19
bC,-0.34,0.07,-0.47,-0.21
bIA,-0.44,0.08,-0.59,-0.29
bIC,-1.24,0.1,-1.42,-1.05


In [7]:
az.summary(m2_trace, kind='stats', round_to=2, var_names=['~cutpoints', '~bid'])

Unnamed: 0,mean,sd,hdi_3%,hdi_97%
bA,-0.65,0.05,-0.75,-0.55
bI,-0.38,0.06,-0.51,-0.28
bC,-0.45,0.07,-0.58,-0.32
bIA,-0.56,0.08,-0.71,-0.41
bIC,-1.67,0.1,-1.86,-1.48
bid_sig,1.92,0.08,1.78,2.09


All the coeficients, **bA**, **bI**, **bC**, **bIA**, **bIC** have an absolute negative value greater for Model 2.
<br><br>
**bid_sig** which is the variation for the individuals is very significant.

In [8]:
az.compare({'model_1': m1_trace, 'model_2': m2_trace}, ic='waic')

The scale is now log by default. Use 'scale' argument or 'stats.ic_scale' rcParam if you rely on a specific value.
A higher log-score (or a lower deviance) indicates a model with better predictive accuracy.
See http://arxiv.org/abs/1507.04544 for details


Unnamed: 0,rank,waic,p_waic,d_waic,weight,se,dse,warning,waic_scale
model_2,0,-15528.9,356.322,0.0,,38.5204,0.0,True,log
model_1,1,-18464.5,10.8713,2935.64,0.0,89.7209,86.7409,False,log


By comparing the WAIC of both models it's clear that the model that considers individual participants, Model 2, is way better than the model tha doesn't, Model 1.

---
---

In [9]:
%load_ext watermark
%watermark -iv -v -nuw

Last updated: Mon Apr 12 2021

Python implementation: CPython
Python version       : 3.8.5
IPython version      : 7.19.0

pandas    : 1.0.5
numpy     : 1.19.1
daft      : 0.1.0
matplotlib: 3.3.2
seaborn   : 0.11.0
scipy     : 1.5.4
arviz     : 0.10.0
pymc3     : 3.9.3

Watermark: 2.1.0

