In [5]:
'''

Eating disorders Montecatini
Written by Corrado Caudek

PURPOSE

The present script is used for the HDDMrl analysis of the PRL experiment.
The task manipulation was the image type: food and neutral images.
With this script, the HDDMrl parameters are estimated separately for each participant. 
The recovered parameters will then be used to classify the participants in patients/controls.

'''

import datetime
now = datetime.datetime.now()
print ("Current date and time : ")
print (now.strftime("%Y-%m-%d %H:%M:%S"))

Current date and time : 
2022-06-10 15:28:17


In [6]:
# MODULE IMPORTS ----

# warning settings
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

# Data management
import pandas as pd
import numpy as np
import pickle
import kabuki

from patsy import dmatrix
from kabuki.analyze import gelman_rubin
from kabuki.utils import concat_models
import pathlib

# Plotting
import matplotlib.pyplot as plt
import matplotlib
import seaborn as sns

# Stats functionality
from statsmodels.distributions.empirical_distribution import ECDF

# HDDM 
import hddm
print('hddm version:', hddm.__version__)
print('kabuki version:', kabuki.__version__)
# Python version
from platform import python_version
print('Python version:', python_version())

hddm version: 0.8.0
kabuki version: 0.6.5
Python version: 3.6.10


In [145]:
pwd()

'/Users/corrado/_repositories/eating_disorders_montecatini/scripts/python/PRL/classification'

In [7]:
# READ DATA ----
data = hddm.load_csv('input_hddmrl_balanced_an_hc.csv') 
data.head()

Unnamed: 0,subj_idx,response,stim,rt,trial,split_by,feedback,diag_cat,subj_code,q_init
0,0,0,food,0.979,1,0,0,AN,ca_po_2002_05_25_700_f,0.5
1,0,0,neutral,1.553,1,1,0,AN,ca_po_2002_05_25_700_f,0.5
2,0,1,food,1.939,2,0,0,AN,ca_po_2002_05_25_700_f,0.5
3,0,1,neutral,0.35,2,1,1,AN,ca_po_2002_05_25_700_f,0.5
4,0,0,food,0.768,3,0,0,AN,ca_po_2002_05_25_700_f,0.5


In [8]:
vec_subj_code = data['subj_code'].unique()
vec_subj_idx = data['subj_idx'].unique()

In [9]:
print(vec_subj_idx)

[  0   1   2   3   4   5   6   7   8   9  10  11  12  13  14  15  16  17
  18  19  20  21  22  23  24  25  26  27  28  29  30  31  32  33  34  35
  36  37  38  39  40  41  42  43  44  45  46  47  48  49  50  51  52  53
  54  55  56  57  58  59  60  61  62  63  64  65  66  67  68  69  70  71
  72  73  74  75  76  77  78  79  80  81  82  83  84  85  86  87  88  89
  90  91  92  93  94  95  96  97  98  99 100 101 102 103 104 105 106 107
 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125
 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143
 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161
 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179
 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197
 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215]


In [10]:
# Select one participant
one_subj = data[data['subj_idx'] == 1]
one_subj.shape

(320, 10)

In [150]:
a_food_recovered = []
a_neutral_recovered = []
v_food_recovered = []
v_neutral_recovered = []
t_food_recovered = []
t_neutral_recovered = []
alpha_food_recovered = []
alpha_neutral_recovered = []
pos_alpha_food_recovered = []
pos_alpha_neutral_recovered = []
vec_diag_cat = []

In [151]:
# Determine the number of subjects
n_subj = len(vec_subj_idx)
print('Number of subjects:', n_subj)

Number of subjects: 216


In [152]:
for i in range(0, n_subj):
    one_subj = data[data['subj_idx'] == (i)]
    mod = hddm.HDDMrl(
        one_subj,
        depends_on = {'a':['stim'],'v':['stim'],'t':['stim'],'z':['stim'],'alpha':['stim'],'pos_alpha':['stim']},
        dual = True, p_outlier = 0.05
    )
    mod.find_starting_values()
    mod.sample(2500, burn=500, dbname='traces.db', db='pickle')
    # parameter a food
    p1 = mod.nodes_db.node['a(food)'] 
    a_food_recovered.append(p1.trace().mean())
    # parameter a neutral
    p2 = mod.nodes_db.node['a(neutral)'] 
    a_neutral_recovered.append(p2.trace().mean())
    # parameter v food
    p3 = mod.nodes_db.node['v(food)'] 
    v_food_recovered.append(p3.trace().mean())
    # parameter v neutral
    p4 = mod.nodes_db.node['v(neutral)'] 
    v_neutral_recovered.append(p4.trace().mean())
    # parameter t food
    p5 = mod.nodes_db.node['t(food)'] 
    t_food_recovered.append(p5.trace().mean())
    # parameter t neutral
    p6 = mod.nodes_db.node['t(neutral)'] 
    t_neutral_recovered.append(p6.trace().mean())
    # parameter alpha food
    p7 = mod.nodes_db.node['alpha(food)'] 
    alpha_food_recovered.append(p7.trace().mean())
    # parameter alpha neutral
    p8 = mod.nodes_db.node['alpha(neutral)'] 
    alpha_neutral_recovered.append(p8.trace().mean())
    # parameter pos_alpha food
    p9 = mod.nodes_db.node['pos_alpha(food)'] 
    pos_alpha_food_recovered.append(p9.trace().mean())
    # parameter pos_alpha neutral
    p10 = mod.nodes_db.node['pos_alpha(neutral)'] 
    pos_alpha_neutral_recovered.append(p10.trace().mean())
    # append diag_cat
    dc = one_subj['diag_cat'].unique()
    vec_diag_cat.append(str(dc[0]))



-244.1821960877768
-244.17352477213333
 [-----------------100%-----------------] 2500 of 2500 complete in 18.5 sec-150.13821196778247
-150.13767033271742
 [-----------------100%-----------------] 2500 of 2500 complete in 18.2 sec-329.96491166645916
-329.8874237420367
 [-----------------100%-----------------] 2500 of 2500 complete in 18.0 sec-133.97157481004217
-133.9687520446945
 [-----------------100%-----------------] 2500 of 2500 complete in 16.3 sec-258.46462072253416
-258.4629135413619
 [-----------------100%-----------------] 2500 of 2500 complete in 21.2 sec-157.78365018504627
-157.78309492459164
 [-----------------100%-----------------] 2500 of 2500 complete in 17.0 sec-116.77504623562204
-116.77369241844168
 [-----------------100%-----------------] 2500 of 2500 complete in 17.0 sec-126.76752345203644
-126.76742447194862
 [-----------------100%-----------------] 2500 of 2500 complete in 17.3 sec-254.90031142139603
-254.89875941936668
 [-----------------100%-----------------] 25

In [153]:
# Create dataframe with the recovered parameters
params_df = pd.DataFrame({
    'a_food':a_food_recovered, 'a_neutral':a_neutral_recovered,
    'v_food':v_food_recovered, 'v_neutral':v_neutral_recovered,
    't_food':t_food_recovered, 't_neutral':t_neutral_recovered,
    'alpha_food':alpha_food_recovered, 'alpha_neutral':alpha_neutral_recovered,
    'pos_alpha_food':pos_alpha_food_recovered, 'pos_alpha_neutral':pos_alpha_neutral_recovered,
    'subj_idx':vec_subj_idx, 'subj_code':vec_subj_code, 'diag_cat':vec_diag_cat
    })
print(params_df)

       a_food  a_neutral    v_food  v_neutral    t_food  t_neutral  \
0    1.257562   1.129257  0.032940   1.037706  0.090448   0.100544   
1    1.653434   1.097017  0.606774   3.814399  0.095269   0.638446   
2    1.324968   1.673929  2.433648   0.589007  0.100739   0.114788   
3    1.463655   1.061821  0.993632   3.124451  0.126467   0.266223   
4    1.378611   1.274620  3.890370   0.281824  0.186096   0.154264   
..        ...        ...       ...        ...       ...        ...   
211  1.012282   1.307644  0.606995  -0.373099  0.124109   0.087485   
212  1.579790   1.780790  0.580838   0.927793  0.072713   0.162751   
213  1.433782   1.071424  1.718586   2.506123  0.153711   0.280686   
214  1.519129   1.062500  2.147193   3.269255  0.111875   0.135190   
215  1.421864   1.224218  3.056834   0.697549  0.132257   0.136503   

     alpha_food  alpha_neutral  pos_alpha_food  pos_alpha_neutral  subj_idx  \
0     -1.070082      -1.607288       -1.274339          -1.327025         0   
1

In [154]:
# Write recovered parameters to file
params_df.to_csv('params_invidivual_subjects.csv')

In [3]:
 mod = hddm.HDDMrl(
        one_subj,
        depends_on = {'a':['stim'],'v':['stim'],'t':['stim'],'z':['stim'],'alpha':['stim'],'pos_alpha':['stim']},
        dual = True, p_outlier = 0.05
    )
mod.find_starting_values()
    mod.sample(2500, burn=500, dbname='traces.db', db='pickle')

IndentationError: unexpected indent (<ipython-input-3-626084587048>, line 6)

In [33]:
v_reg = {'model': "alpha ~ 0 + C(stim, Treatment('neutral'))", 
         'link_func': lambda x:x}

m1 = hddm.HDDMrlRegressor(
    one_subj, 
    v_reg, 
    include=('alpha'),
    depends_on = {'a':['stim']},
    p_outlier = 0.05,
    group_only_regressors=False, 
    keep_regressor_trace=True
)

Adding these covariates:
["alpha_C(stim, Treatment('neutral'))[food]", "alpha_C(stim, Treatment('neutral'))[neutral]"]


In [34]:
m1.find_starting_values()
m1.sample(150, burn=5, dbname='test.db', db='pickle') # 54392.1 sec


-339.99510313875214
-339.93070785317195
 [-                 4%                  ] 6 of 150 complete in 33.2 secHalting at iteration  5  of  150
Could not generate output statistics for t
Could not generate output statistics for alpha_C(stim, Treatment('neutral'))[neutral]
Could not generate output statistics for v
Could not generate output statistics for a(food)
Could not generate output statistics for alpha_C(stim, Treatment('neutral'))[food]
Could not generate output statistics for a(neutral)
Could not generate output statistics for alpha_reg


TypeError: 'NoneType' object is not subscriptable

In [32]:
m1.print_stats()

                                              mean         std      2.5q        25q       50q       75q     97.5q       mc err
a(food)                                    1.58877   0.0500233   1.49867    1.55178   1.59067   1.62135   1.68835   0.00485566
a(neutral)                                 2.52403    0.127537    2.2763    2.44729   2.52772   2.58527   2.80784    0.0134711
t                                         0.126701  0.00888257  0.107842   0.121118  0.128043  0.133218  0.140855  0.000856489
alpha(food)                                -36.683     26.4364  -91.8896   -49.8777  -33.1529  -15.3148  -3.21987      2.68174
alpha(neutral)                            0.975944    0.344642  0.364102   0.722661  0.955588   1.20505   1.72866    0.0303933
v_C(stim, Treatment('neutral'))[food]      1.88889     2.86535  -4.03364 -0.0975124   1.88223   3.83527   8.07676     0.277043
v_C(stim, Treatment('neutral'))[neutral]   2.11223    0.196189   1.72033    1.96735   2.11133   2.23776   2.497