In [12]:
"""

EATING DISORDERS MONTECATINI

Purpose: HDDMrl models' comparison for the three-groups (HC, RI, AN) data.

Written by Corrado Caudek (2023-06-30)

"""

import datetime

now = datetime.datetime.now()
print("Current date and time : ")
print(now.strftime("%Y-%m-%d %H:%M:%S"))


Current date and time : 
2023-07-01 15:48:38


This notebook follows the [tutorial](https://hddm.readthedocs.io/en/latest/demo_RLHDDMtutorial.html#checking-results) on the hddm webpage.


In [13]:
%matplotlib inline 

import os, time, csv, sys
import glob

import numpy as np
import pandas as pd
import xarray as xr
import matplotlib.pyplot as plt
import seaborn as sns

import pymc as pm
import hddm
import kabuki
import arviz as az

import pymc.progressbar as pbar
import pathlib

from kabuki.utils import concat_models
from kabuki.analyze import check_geweke
from kabuki.analyze import gelman_rubin

from patsy import dmatrix  # for generation of (regression) design matrices
import pickle

from tqdm import tqdm

import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
warnings.filterwarnings("ignore", category=DeprecationWarning)
warnings.filterwarnings("ignore", category=np.VisibleDeprecationWarning)

# Data management
pd.options.display.max_colwidth = 100

az.style.use("arviz-white")
%config InlineBackend.figure_format='retina'

print("The hddm version is", hddm.__version__)

The hddm version is 0.9.8


In [14]:
# Set display options to show all rows and columns
pd.set_option("display.max_rows", None)
pd.set_option("display.max_columns", None)

# Reset display options to their default values
# pd.reset_option('display.max_rows')
# pd.reset_option('display.max_columns')

In [15]:
!pwd

/Users/corrado/_repositories/eating_disorders_23/src/python/PRL/01a_get_hddmrl_params


In [16]:
data = hddm.load_csv(
    "/Users/corrado/_repositories/eating_disorders_23/data/processed/prl/input_for_hddmrl/three_groups/ed_prl_data.csv"
)

In [17]:
print("There are %d participants" % data["subj_idx"].nunique())


There are 116 participants


In [18]:
data["diag_cat"].value_counts()

HC    14400
AN    11040
RI     9760
Name: diag_cat, dtype: int64

In [19]:
data.groupby("diag_cat")["subj_code"].nunique()

diag_cat
AN    37
HC    45
RI    34
Name: subj_code, dtype: int64

Check whether all AN patients have performance above 0.5 for at least one of the two conditions (food, neutral).

In [20]:
# Group the DataFrame and calculate the mean of 'feedback' for each combination of 'subj_code', 'stim', and 'diag_cat'
mean_feedback = data.groupby(["diag_cat", "stim"])["feedback"].mean()
print(mean_feedback)

diag_cat  stim   
AN        food       0.531964
          neutral    0.549816
HC        food       0.563056
          neutral    0.565417
RI        food       0.559091
          neutral    0.573884
Name: feedback, dtype: float64


In [21]:
data.columns

Index(['subj_idx', 'response', 'stim', 'rt', 'trial', 'split_by', 'feedback',
       'diag_cat', 'subj_code', 'q_init'],
      dtype='object')

In [22]:
# Calculate the minimum, maximum, and mean values of 'rt' for each combination of 'diag_cat' and 'stim'
result = data.groupby(["diag_cat", "stim"])["rt"].agg(["min", "max", "mean", "median"])
print(result)

                   min    max      mean  median
diag_cat stim                                  
AN       food     0.15  2.488  0.668403  0.5390
         neutral  0.15  2.462  0.579149  0.4745
HC       food     0.15  2.499  0.584286  0.4545
         neutral  0.15  2.462  0.516753  0.4070
RI       food     0.15  2.493  0.640668  0.4985
         neutral  0.15  2.449  0.533703  0.4160


## Model 1

In [23]:
m1 = hddm.HDDMrl(
    data,
    # bias=True,
    # depends_on={
    #     "a": ["diag_cat", "stim"],
    #     "v": ["diag_cat", "stim"],
    #     "t": ["diag_cat", "stim"],
    #     "alpha": ["diag_cat", "stim"],
    #     "pos_alpha": ["diag_cat", "stim"],
    # },
    # dual=True,  # separate learning rates for pos/neg feedbacks
    p_outlier=0.05,
    informative=True,  # informative priors on ddm params
    include=["v", "a", "t"],
)

m1.find_starting_values()

m1.sample(2500, burn=500, dbname="models/ddm1.db", db="pickle")

No model attribute --> setting up standard HDDM
Set model to ddm


  tmp2 = (x - v) * (fx - fw)


 [-----------------100%-----------------] 2501 of 2500 complete in 965.2 sec

<pymc.MCMC.MCMC at 0x7f85437990d0>

In [24]:
print("m1 DIC: %f" % m1.dic)

m1 DIC: 35409.217461


## Model 2

In [25]:
m2 = hddm.HDDMrl(
    data,
    # bias=True,
    # depends_on={
    #     "a": ["diag_cat", "stim"],
    #     "v": ["diag_cat", "stim"],
    #     "t": ["diag_cat", "stim"],
    #     "alpha": ["diag_cat", "stim"],
    #     "pos_alpha": ["diag_cat", "stim"],
    # },
    dual=True,  # separate learning rates for pos/neg feedbacks
    p_outlier=0.05,
    informative=True,  # informative priors on ddm params
    include=["v", "a", "t"],
)

m2.find_starting_values()

m2.sample(2500, burn=500, dbname="models/ddm2.db", db="pickle")

No model attribute --> setting up standard HDDM
Set model to ddm
 [-----------------100%-----------------] 2501 of 2500 complete in 1346.1 sec

<pymc.MCMC.MCMC at 0x7f8545dd9b10>

In [26]:
print("m2 DIC: %f" % m2.dic)

m2 DIC: 34717.401648


## Model 3

In [27]:
m3 = hddm.HDDMrl(
    data,
    # bias=True,
    depends_on={
        #     "a": ["diag_cat", "stim"],
        #     "v": ["diag_cat", "stim"],
        #     "t": ["diag_cat", "stim"],
        "alpha": ["diag_cat"],
        "pos_alpha": ["diag_cat"],
    },
    dual=True,  # separate learning rates for pos/neg feedbacks
    p_outlier=0.05,
    informative=True,  # informative priors on ddm params
    include=["v", "a", "t"],
)

m3.find_starting_values()

m3.sample(2500, burn=500, dbname="models/ddm3.db", db="pickle")

No model attribute --> setting up standard HDDM
Set model to ddm
 [-----------------100%-----------------] 2500 of 2500 complete in 1249.3 sec

<pymc.MCMC.MCMC at 0x7f85478ddad0>

In [28]:
print("m3 DIC: %f" % m3.dic)

m3 DIC: 34806.970730


## Model 4

In [29]:
m4 = hddm.HDDMrl(
    data,
    # bias=True,
    depends_on={
        #     "a": ["diag_cat", "stim"],
        #     "v": ["diag_cat", "stim"],
        #     "t": ["diag_cat", "stim"],
        "alpha": ["diag_cat", "stim"],
        "pos_alpha": ["diag_cat", "stim"],
    },
    dual=True,  # separate learning rates for pos/neg feedbacks
    p_outlier=0.05,
    informative=True,  # informative priors on ddm params
    include=["v", "a", "t"],
)

m4.find_starting_values()

m4.sample(2500, burn=500, dbname="models/ddm4.db", db="pickle")

No model attribute --> setting up standard HDDM
Set model to ddm
 [-----------------100%-----------------] 2501 of 2500 complete in 1760.0 sec

<pymc.MCMC.MCMC at 0x7f854b523650>

In [30]:
print("m4 DIC: %f" % m4.dic)

m4 DIC: 33369.489305


## Model 5

In [31]:
m5 = hddm.HDDMrl(
    data,
    # bias=True,
    depends_on={
        "a": ["diag_cat", "stim"],
        #     "v": ["diag_cat", "stim"],
        #     "t": ["diag_cat", "stim"],
        "alpha": ["diag_cat", "stim"],
        "pos_alpha": ["diag_cat", "stim"],
    },
    dual=True,  # separate learning rates for pos/neg feedbacks
    p_outlier=0.05,
    informative=True,  # informative priors on ddm params
    include=["v", "a", "t"],
)

m5.find_starting_values()

m5.sample(2500, burn=500, dbname="models/ddm5.db", db="pickle")

No model attribute --> setting up standard HDDM
Set model to ddm
 [-----------------100%-----------------] 2501 of 2500 complete in 1976.2 sec

<pymc.MCMC.MCMC at 0x7f854e5a3bd0>

In [32]:
print("m5 DIC: %f" % m5.dic)

m5 DIC: 32077.357044


## Model 6

In [33]:
m6 = hddm.HDDMrl(
    data,
    # bias=True,
    depends_on={
        "a": ["diag_cat", "stim"],
        "v": ["diag_cat", "stim"],
        #     "t": ["diag_cat", "stim"],
        "alpha": ["diag_cat", "stim"],
        "pos_alpha": ["diag_cat", "stim"],
    },
    dual=True,  # separate learning rates for pos/neg feedbacks
    p_outlier=0.05,
    informative=True,  # informative priors on ddm params
    include=["v", "a", "t"],
)

m6.find_starting_values()

m6.sample(2500, burn=500, dbname="models/ddm6.db", db="pickle")

No model attribute --> setting up standard HDDM
Set model to ddm
 [-----------------100%-----------------] 2501 of 2500 complete in 1853.3 sec

<pymc.MCMC.MCMC at 0x7f8531b890d0>

In [34]:
print("m6 DIC: %f" % m6.dic)

m6 DIC: 31858.265500


## Model 7

In [35]:
m7 = hddm.HDDMrl(
    data,
    # bias=True,
    depends_on={
        "a": ["diag_cat", "stim"],
        "v": ["diag_cat", "stim"],
        "t": ["diag_cat", "stim"],
        "alpha": ["diag_cat", "stim"],
        "pos_alpha": ["diag_cat", "stim"],
    },
    dual=True,  # separate learning rates for pos/neg feedbacks
    p_outlier=0.05,
    informative=True,  # informative priors on ddm params
    include=["v", "a", "t"],
)

m7.find_starting_values()

m7.sample(2500, burn=500, dbname="models/ddm7.db", db="pickle")

No model attribute --> setting up standard HDDM
Set model to ddm
 [-----------------100%-----------------] 2501 of 2500 complete in 1785.9 sec

<pymc.MCMC.MCMC at 0x7f8536a9bad0>

In [36]:
print("m7 DIC: %f" % m7.dic)

m7 DIC: 30812.397933


## Model 8

In [37]:
m8 = hddm.HDDMrl(
    data,
    bias=True,
    depends_on={
        "a": ["diag_cat", "stim"],
        "v": ["diag_cat", "stim"],
        "t": ["diag_cat", "stim"],
        "alpha": ["diag_cat", "stim"],
        "pos_alpha": ["diag_cat", "stim"],
    },
    dual=True,  # separate learning rates for pos/neg feedbacks
    p_outlier=0.05,
    informative=True,  # informative priors on ddm params
    include=["v", "a", "t"],
)

m8.find_starting_values()

m8.sample(2500, burn=500, dbname="models/ddm8.db", db="pickle")

No model attribute --> setting up standard HDDM
Set model to ddm
 [-----------------100%-----------------] 2501 of 2500 complete in 2230.2 sec

<pymc.MCMC.MCMC at 0x7f853a2d46d0>

In [38]:
print("m8 DIC: %f" % m8.dic)

m8 DIC: 30826.391885
