In [1]:
'''

EATING DISORDERS MONTECATINI

Purpose: Model comparison (HDDMrl) for the PRL data as a function of diagnostic category and stimulus.

(1) diag_cat (diagnostic category) 

- AN     anorexia, 
- AN-R   anorexia in remission, 
- BN     bulimia, 
- BN-R   bulimia in remission, 
- RI     university students at risk according to the EAT-26,
- HC     university students not at risk according to the EAT-26,

(2) stim (stimulus)

- food,
- neutral.

Written by Corrado Caudek

'''


import datetime
now = datetime.datetime.now()
print("Current date and time : ")
print(now.strftime("%Y-%m-%d %H:%M:%S"))


Current date and time : 
2022-06-17 06:51:28


In [2]:
# Import modules

# warning settings
import hddm
from statsmodels.distributions.empirical_distribution import ECDF
from scipy import stats
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib as mpl
import pymc.progressbar as pbar
import pymc
import pathlib
import kabuki
from kabuki.utils import concat_models
from kabuki.analyze import check_geweke
from kabuki.analyze import gelman_rubin
from patsy import dmatrix  # for generation of (regression) design matrices
import pickle
import numpy as np
from pandas import Series  # to manipulate data-frames generated by hddm
import pandas as pd
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
warnings.filterwarnings("ignore", category=DeprecationWarning)
warnings.filterwarnings("ignore", category=np.VisibleDeprecationWarning)

# Data management
pd.options.display.max_colwidth = 100

# Plotting
%matplotlib inline

sns.set()  # for plot styling
plt.style.use('classic')

# HDDM
print("The hddm version is", hddm.__version__)


  from .autonotebook import tqdm as notebook_tqdm


The hddm version is 0.9.7


In [3]:
# custom save/load fns
def savePatch(self, fname):
    with open(fname, 'wb') as f:  # write binary
        pickle.dump(self, f)


hddm.HDDM.savePatch = savePatch


def load1(fname):
    model = pickle.load(open(fname, 'rb'))
    return model


hddm.load1 = load1


In [5]:
# load accuracy-coded data ('response' column represents accuracy: 1=correct trial; 0=error trial) CHANGE!!!
data = hddm.load_csv(
    '/Users/corrado/Dropbox/in_progress/01_TODO_ED_MONTECATINI/data/processed/prl/input_for_hddmrl/hddm_input_20220603v2.csv')
print('There are %d participants' % data['subj_idx'].nunique())

There are 310 participants


In [6]:
data.head()

Unnamed: 0,subj_idx,response,stim,rt,trial,split_by,feedback,diag_cat,subj_code,q_init
0,1,0,food,0.979,1,0,0,AN,ca_po_2002_05_25_700_f,0.5
1,1,0,neutral,1.553,1,1,0,AN,ca_po_2002_05_25_700_f,0.5
2,1,1,food,1.939,2,0,0,AN,ca_po_2002_05_25_700_f,0.5
3,1,1,neutral,0.35,2,1,1,AN,ca_po_2002_05_25_700_f,0.5
4,1,0,food,0.768,3,0,0,AN,ca_po_2002_05_25_700_f,0.5


## Model 1

In [7]:
m1 = hddm.HDDMrl(
    data,
    # bias=True,
    # depends_on={
    #     'a': ['diag_cat', 'stim'],
    #     'v': ['diag_cat', 'stim'],
    #     't': ['diag_cat', 'stim'],
    #     'alpha': ['diag_cat', 'stim'],
    #     'pos_alpha': ['diag_cat', 'stim']
    # },
    # dual=True,  # separate learning rates for pos/neg feedbacks
    p_outlier=0.05,
    informative=True  # informative priors on ddm params
)


No model attribute --> setting up standard HDDM
Set model to ddm


In [8]:
m1.find_starting_values()


  tmp2 = (x - v) * (fx - fw)


In [9]:
m1.sample(2500, burn=500, dbname='models/ddm1.db', db='pickle')


 [-----------------100%-----------------] 2501 of 2500 complete in 2732.4 sec

<pymc.MCMC.MCMC at 0x7f993a28fe20>

In [10]:
print("m1 DIC: %f" % m1.dic)
# 103209.264137

m1 DIC: 103209.264137


## Model 2

In [11]:
m2 = hddm.HDDMrl(
    data,
    # bias=True,
    # depends_on={
    #     'a': ['diag_cat', 'stim'],
    #     'v': ['diag_cat', 'stim'],
    #     't': ['diag_cat', 'stim'],
    #     'alpha': ['diag_cat', 'stim'],
    #     'pos_alpha': ['diag_cat', 'stim']
    # },
    dual=True,  # separate learning rates for pos/neg feedbacks
    p_outlier=0.05,
    informative=True  # informative priors on ddm params
)

No model attribute --> setting up standard HDDM
Set model to ddm


In [12]:
m2.find_starting_values()

  tmp2 = (x - v) * (fx - fw)


In [13]:
m2.sample(2500, burn=500, dbname='models/ddm2.db', db='pickle')

 [-----------------100%-----------------] 2501 of 2500 complete in 3759.9 sec

<pymc.MCMC.MCMC at 0x7f993effcbe0>

In [14]:
print("m2 DIC: %f" % m2.dic)
# 101590.157114

m2 DIC: 101590.157114


## Model 3

In [16]:
m3 = hddm.HDDMrl(
    data,
    # bias=True,
    depends_on={
    #     'a': ['diag_cat', 'stim'],
    #     'v': ['diag_cat', 'stim'],
    #     't': ['diag_cat', 'stim'],
         'alpha': ['diag_cat'],
         'pos_alpha': ['diag_cat']
    },
    dual=True,  # separate learning rates for pos/neg feedbacks
    p_outlier=0.05,
    informative=True  # informative priors on ddm params
)

No model attribute --> setting up standard HDDM
Set model to ddm


In [17]:
m3.find_starting_values()

  tmp2 = (x - v) * (fx - fw)


In [18]:
m3.sample(2500, burn=500, dbname='models/ddm3.db', db='pickle')

 [-----------------100%-----------------] 2501 of 2500 complete in 3727.9 sec

<pymc.MCMC.MCMC at 0x7f9953d33490>

In [19]:
print("m3 DIC: %f" % m3.dic)

m3 DIC: 101613.876788


## Model 4

In [20]:
m4 = hddm.HDDMrl(
    data,
    # bias=True,
    depends_on={
    #     'a': ['diag_cat', 'stim'],
    #     'v': ['diag_cat', 'stim'],
    #     't': ['diag_cat', 'stim'],
         'alpha': ['diag_cat', 'stim'],
         'pos_alpha': ['diag_cat', 'stim']
    },
    dual=True,  # separate learning rates for pos/neg feedbacks
    p_outlier=0.05,
    informative=True  # informative priors on ddm params
)

No model attribute --> setting up standard HDDM
Set model to ddm


In [21]:
m4.find_starting_values()

  tmp2 = (x - v) * (fx - fw)


In [22]:
m4.sample(2500, burn=500, dbname='models/ddm4.db', db='pickle')

 [-----------------100%-----------------] 2501 of 2500 complete in 4779.1 sec

<pymc.MCMC.MCMC at 0x7f992b2b3820>

In [23]:
print("m4 DIC: %f" % m4.dic)
# m4 DIC: 99133.675024

m4 DIC: 99133.675024


## Model 5

In [24]:
m5 = hddm.HDDMrl(
    data,
    # bias=True,
    depends_on={
         'a': ['diag_cat', 'stim'],
    #     'v': ['diag_cat', 'stim'],
    #     't': ['diag_cat', 'stim'],
         'alpha': ['diag_cat', 'stim'],
         'pos_alpha': ['diag_cat', 'stim']
    },
    dual=True,  # separate learning rates for pos/neg feedbacks
    p_outlier=0.05,
    informative=True  # informative priors on ddm params
)

No model attribute --> setting up standard HDDM
Set model to ddm


In [25]:
m5.find_starting_values()

  tmp2 = (x - v) * (fx - fw)


In [26]:
m5.sample(2500, burn=500, dbname='models/ddm5.db', db='pickle')

 [-----------------100%-----------------] 2501 of 2500 complete in 5068.5 sec

<pymc.MCMC.MCMC at 0x7f9934908a30>

In [27]:
print("m5 DIC: %f" % m5.dic)
# m5 DIC: 96150.581485

m5 DIC: 96150.581485


## Model 6

In [28]:
m6 = hddm.HDDMrl(
    data,
    # bias=True,
    depends_on={
         'a': ['diag_cat', 'stim'],
         'v': ['diag_cat', 'stim'],
    #     't': ['diag_cat', 'stim'],
         'alpha': ['diag_cat', 'stim'],
         'pos_alpha': ['diag_cat', 'stim']
    },
    dual=True,  # separate learning rates for pos/neg feedbacks
    p_outlier=0.05,
    informative=True  # informative priors on ddm params
)

No model attribute --> setting up standard HDDM
Set model to ddm


In [29]:
m6.find_starting_values()

  tmp2 = (x - v) * (fx - fw)


In [30]:
m6.sample(2500, burn=500, dbname='models/ddm6.db', db='pickle')

 [-----------------100%-----------------] 2501 of 2500 complete in 5038.4 sec

<pymc.MCMC.MCMC at 0x7f991e2692b0>

In [31]:
print("m6 DIC: %f" % m6.dic)
# m6 DIC: 95434.069822


m6 DIC: 95434.069822


## Model 7

In [32]:
m7 = hddm.HDDMrl(
    data,
    # bias=True,
    depends_on={
         'a': ['diag_cat', 'stim'],
         'v': ['diag_cat', 'stim'],
         't': ['diag_cat', 'stim'],
         'alpha': ['diag_cat', 'stim'],
         'pos_alpha': ['diag_cat', 'stim']
    },
    dual=True,  # separate learning rates for pos/neg feedbacks
    p_outlier=0.05,
    informative=True  # informative priors on ddm params
)

No model attribute --> setting up standard HDDM
Set model to ddm


In [33]:
m7.find_starting_values()

  tmp2 = (x - v) * (fx - fw)


In [34]:
m7.sample(2500, burn=500, dbname='models/ddm7.db', db='pickle')

 [-----------------100%-----------------] 2501 of 2500 complete in 5309.1 sec

<pymc.MCMC.MCMC at 0x7f9906309340>

In [35]:
print("m7 DIC: %f" % m7.dic)
# m7 DIC: 92808.856314


m7 DIC: 92808.856314


## Model 8

In [36]:
m8 = hddm.HDDMrl(
    data,
    bias=True,
    depends_on={
         'a': ['diag_cat', 'stim'],
         'v': ['diag_cat', 'stim'],
         't': ['diag_cat', 'stim'],
         'alpha': ['diag_cat', 'stim'],
         'pos_alpha': ['diag_cat', 'stim']
    },
    dual=True,  # separate learning rates for pos/neg feedbacks
    p_outlier=0.05,
    informative=True  # informative priors on ddm params
)

No model attribute --> setting up standard HDDM
Set model to ddm


In [37]:
m8.find_starting_values()

  tmp2 = (x - v) * (fx - fw)


In [38]:
m8.sample(2500, burn=500, dbname='models/ddm8.db', db='pickle')

 [-----------------100%-----------------] 2501 of 2500 complete in 5888.0 sec

<pymc.MCMC.MCMC at 0x7f99065fe5e0>

In [39]:
print("m8 DIC: %f" % m8.dic)
# m8 DIC: 93157.610953

m8 DIC: 93157.610953
