In [3]:
"""

EATING DISORDERS MONTECATINI

Purpose: Obtain the posterior means for the parameters of the HDDMrl model 
         for each participant using "ddm/traces.csv". In the present case, 
         the data of model M8 are used. 
         Obtain subj_code from the file used as input to hddm.
         Create a file with both subj_code and the posterior means of the 
         HDDMrl parameters.

Written by Corrado Caudek (2023-07-06)

"""

'\n\nEATING DISORDERS MONTECATINI\n\nPurpose: Obtain the posterior means for the parameters of the HDDMrl model \n         for each participant using "ddm/traces.csv". In the present case, \n         the data of model M8 are used. \n         Obtain subj_code from the file used as input to hddm.\n         Create a file with both subj_code and the posterior means of the \n         HDDMrl parameters.\n\nWritten by Corrado Caudek (2023-07-06)\n\n'

In [4]:
%matplotlib inline 

import os, time, csv, sys
import glob

import numpy as np
import pandas as pd
import xarray as xr
import matplotlib.pyplot as plt
import seaborn as sns
from statsmodels.distributions.empirical_distribution import ECDF
from scipy import stats

import hddm

import pymc
import pymc as pm
import arviz as az

# Data management
pd.options.display.max_colwidth = 100


In [5]:
data = pd.read_csv("ddm/traces.csv")
data = data.drop(data.columns[0], axis=1)
data.head()

Unnamed: 0,a(AN.food),a(AN.neutral),a(HC.food),a(HC.neutral),a(RI.food),a(RI.neutral),a_std,a_subj(AN.food).1,a_subj(AN.food).4,a_subj(AN.food).5,...,pos_alpha_subj(RI.neutral).83,pos_alpha_subj(RI.neutral).86,pos_alpha_subj(RI.neutral).91,pos_alpha_subj(RI.neutral).92,pos_alpha_subj(RI.neutral).93,pos_alpha_subj(RI.neutral).95,pos_alpha_subj(RI.neutral).99,pos_alpha_subj(RI.neutral).111,pos_alpha_subj(RI.neutral).113,pos_alpha_subj(RI.neutral).114
0,1.397416,1.249067,1.351315,1.162854,1.312107,1.260692,0.206479,1.214189,1.385565,1.53588,...,3.753372,1.625114,-0.083374,2.37205,3.923368,-1.683346,-0.087606,-0.038245,3.911917,4.256163
1,1.401915,1.311477,1.300428,1.166193,1.31224,1.343598,0.252398,1.354999,1.379008,1.574464,...,3.500831,3.848192,-0.825695,6.776927,0.970095,-3.189411,0.343492,-0.709189,2.600073,0.667245
2,1.43505,1.252805,1.298151,1.170947,1.302349,1.214292,0.232662,1.268966,1.334123,1.507743,...,5.65291,2.804052,-0.6136,6.341303,4.405931,-3.249074,1.57162,-0.932823,1.863105,0.756014
3,1.456558,1.304838,1.320196,1.253136,1.335521,1.267989,0.213264,1.27174,1.358004,1.581126,...,3.438743,4.626994,-0.265223,7.773233,1.85118,-5.162659,1.643261,-0.677264,1.88048,0.849897
4,1.418766,1.275415,1.296918,1.206623,1.317577,1.245782,0.22641,1.263081,1.293899,1.573345,...,4.118109,3.977853,-0.48586,5.050355,3.727054,-3.241631,-0.346091,-0.057078,2.042557,-0.85406


In [6]:
mean_values = data.mean()

# Create a new DataFrame 'df' with column names and mean values
df = pd.DataFrame({'params': mean_values.index, 'val': mean_values.values})
df["params"] = df["params"].astype(str)  # Convert the column to string type
# Filter rows where the first column contains the string 'subj'
df = df[df["params"].str.contains("subj")]
df["params"] = df["params"].astype(str)  # Convert the 'params' column to string type
df["subj_idx"] = df["params"].apply(lambda x: x.split(".")[-1])
df["par"] = df["params"].str.split("_sub").str[0]
df["grpstim"] = df["params"].str.extract(r"\((.*?)\)\.")
df[["diag_cat", "stim"]] = df["grpstim"].str.split(".", expand=True)
df = df.drop("grpstim", axis=1)
df["subj_idx"] = pd.to_numeric(df["subj_idx"])
df.head()

Unnamed: 0,params,val,subj_idx,par,diag_cat,stim
7,a_subj(AN.food).1,1.270092,1,a,AN,food
8,a_subj(AN.food).4,1.352607,4,a,AN,food
9,a_subj(AN.food).5,1.540525,5,a,AN,food
10,a_subj(AN.food).7,1.145499,7,a,AN,food
11,a_subj(AN.food).8,1.638459,8,a,AN,food


In [9]:
working_directory = "/Users/corrado/_repositories/eating_disorders_23/"

codes_tbl_path = os.path.join(
    working_directory,
    "data",
    "processed",
    "prl",
    "input_for_hddmrl",
    "three_groups",
    "ed_prl_data.csv",
)

codes_tbl = pd.read_csv(
    codes_tbl_path
)
codes_tbl = codes_tbl[["subj_idx", "subj_code"]]
codes_tbl = codes_tbl.drop_duplicates()
codes_tbl["subj_idx"] = pd.to_numeric(codes_tbl["subj_idx"])
codes_tbl.head()

Unnamed: 0,subj_idx,subj_code
0,1,ca_po_2002_05_25_700_f
320,2,gi_ba_2008_01_31_376_f
640,3,ca_fa_1996_03_26_092_f
960,4,em_or_2003_01_02_101_f
1280,5,au_ru_1998_09_21_806_f


In [10]:
merged_df = pd.merge(df, codes_tbl, on="subj_idx")
merged_df.shape

(1120, 7)

In [11]:
merged_df.head()

Unnamed: 0,params,val,subj_idx,par,diag_cat,stim,subj_code
0,a_subj(AN.food).1,1.270092,1,a,AN,food,ca_po_2002_05_25_700_f
1,a_subj(AN.neutral).1,1.131988,1,a,AN,neutral,ca_po_2002_05_25_700_f
2,v_subj(AN.food).1,0.799608,1,v,AN,food,ca_po_2002_05_25_700_f
3,v_subj(AN.neutral).1,0.900103,1,v,AN,neutral,ca_po_2002_05_25_700_f
4,t_subj(AN.food).1,0.088883,1,t,AN,food,ca_po_2002_05_25_700_f


## Cohen's d

In [12]:
mean_df = merged_df.groupby(["par", "diag_cat", "stim"])["val"].mean().reset_index()
print(mean_df)

          par diag_cat     stim       val
0           a       AN     food  1.447020
1           a       AN  neutral  1.270594
2           a       HC     food  1.310793
3           a       HC  neutral  1.213981
4           a       RI     food  1.333627
5           a       RI  neutral  1.238774
6       alpha       AN     food  0.761158
7       alpha       AN  neutral  1.902642
8       alpha       HC     food  3.580999
9       alpha       HC  neutral  2.885617
10      alpha       RI     food  3.044799
11      alpha       RI  neutral  3.008047
12  pos_alpha       AN     food -1.838255
13  pos_alpha       AN  neutral  1.040764
14  pos_alpha       HC     food  1.352460
15  pos_alpha       HC  neutral  1.216924
16  pos_alpha       RI     food  0.762479
17  pos_alpha       RI  neutral  1.621782
18          t       AN     food  0.179931
19          t       AN  neutral  0.205113
20          t       HC     food  0.161610
21          t       HC  neutral  0.176945
22          t       RI     food  0

In [14]:
mean_df = merged_df.groupby(["par", "diag_cat", "stim"])["val"].std().reset_index()
print(mean_df)

          par diag_cat     stim       val
0           a       AN     food  0.233203
1           a       AN  neutral  0.205229
2           a       HC     food  0.207748
3           a       HC  neutral  0.201013
4           a       RI     food  0.264549
5           a       RI  neutral  0.198392
6       alpha       AN     food  2.851639
7       alpha       AN  neutral  2.402888
8       alpha       HC     food  2.127621
9       alpha       HC  neutral  2.525458
10      alpha       RI     food  2.519546
11      alpha       RI  neutral  2.277227
12  pos_alpha       AN     food  2.422659
13  pos_alpha       AN  neutral  2.351581
14  pos_alpha       HC     food  1.880636
15  pos_alpha       HC  neutral  1.928992
16  pos_alpha       RI     food  1.847984
17  pos_alpha       RI  neutral  2.080665
18          t       AN     food  0.081097
19          t       AN  neutral  0.115264
20          t       HC     food  0.051896
21          t       HC  neutral  0.057594
22          t       RI     food  0

Cohen's d for alpha_pos AN food vs neutral:

In [29]:
(-1.838 - 1.04) / np.sqrt((2.42**2 + 2.35**2)/2)

-1.2065786797011757

Cohen's d for alpha_pos AN food vs neutral:

In [30]:
(0.761158 - 1.902642) / np.sqrt((2.851639**2 + 2.402**2)/2)

-0.43296699468223265

## Save csv file

In [None]:
saved_file_path = os.path.join(
    working_directory,
    "data",
    "processed",
    "prl",
    "subj_hddm_params",
    "subj_code_hddm_params.csv",
)

merged_df.to_csv(
    saved_file_path,
    index=False,
)

In [19]:
d = pd.read_csv("ddm/ddm_grptrcs.csv")
d = d.drop(d.columns[0], axis=1)
d.head()

Unnamed: 0,a(AN.food),a(AN.neutral),a(HC.food),a(HC.neutral),a(RI.food),a(RI.neutral),a_std,v(AN.food),v(AN.neutral),v(HC.food),...,alpha(RI.food),alpha(RI.neutral),alpha_std,pos_alpha(AN.food),pos_alpha(AN.neutral),pos_alpha(HC.food),pos_alpha(HC.neutral),pos_alpha(RI.food),pos_alpha(RI.neutral),pos_alpha_std
0,1.397416,1.249067,1.351315,1.162854,1.312107,1.260692,0.206479,1.182691,1.613033,1.562967,...,3.992138,4.22145,4.444809,-0.993066,0.94385,1.149061,0.571607,-0.637756,1.24104,2.48921
1,1.401915,1.311477,1.300428,1.166193,1.31224,1.343598,0.252398,1.376908,2.005425,1.649774,...,4.281927,3.906541,4.465355,-1.452688,1.755231,0.800335,1.689709,0.098029,1.184387,2.586177
2,1.43505,1.252805,1.298151,1.170947,1.302349,1.214292,0.232662,2.022165,1.635576,1.768018,...,4.083619,2.763721,3.999259,-1.033294,0.338539,1.3342,0.910355,0.161999,1.16803,2.668255
3,1.456558,1.304838,1.320196,1.253136,1.335521,1.267989,0.213264,1.894378,1.132127,1.580206,...,4.649658,2.727805,4.36725,-1.857032,0.069357,1.077413,1.329339,0.00657,0.880617,2.673449
4,1.418766,1.275415,1.296918,1.206623,1.317577,1.245782,0.22641,2.019168,1.46069,1.631391,...,4.148326,2.255316,4.304327,-1.67001,1.169318,1.241614,0.306833,0.211832,0.453569,2.515282


In [20]:
d.columns

Index(['a(AN.food)', 'a(AN.neutral)', 'a(HC.food)', 'a(HC.neutral)',
       'a(RI.food)', 'a(RI.neutral)', 'a_std', 'v(AN.food)', 'v(AN.neutral)',
       'v(HC.food)', 'v(HC.neutral)', 'v(RI.food)', 'v(RI.neutral)', 'v_std',
       't(AN.food)', 't(AN.neutral)', 't(HC.food)', 't(HC.neutral)',
       't(RI.food)', 't(RI.neutral)', 't_std', 'alpha(AN.food)',
       'alpha(AN.neutral)', 'alpha(HC.food)', 'alpha(HC.neutral)',
       'alpha(RI.food)', 'alpha(RI.neutral)', 'alpha_std',
       'pos_alpha(AN.food)', 'pos_alpha(AN.neutral)', 'pos_alpha(HC.food)',
       'pos_alpha(HC.neutral)', 'pos_alpha(RI.food)', 'pos_alpha(RI.neutral)',
       'pos_alpha_std'],
      dtype='object')

In [28]:
np.mean(d["pos_alpha(AN.food)"]) - np.mean(d["pos_alpha(AN.neutral)"]) / np.sqrt(np.std(d["pos_alpha(AN.food)"])**2 + np.std(d["pos_alpha(AN.neutral)"])**2 / 2)

-2.8035784232344225

In [71]:
import pandas as pd
from scipy import stats

def compute_cohens_d_within_subjects(df, stim_col, diag_cat_col, par_col, val_col):
    results = pd.DataFrame(columns=['diag_cat', 'par', 'cohens_d'])

    # Iterate over unique levels of diag_cat
    diag_cat_levels = df[diag_cat_col].unique()
    for diag_cat_level in diag_cat_levels:
        # Filter the DataFrame for the current diag_cat level
        diag_cat_df = df[df[diag_cat_col] == diag_cat_level]

        # Iterate over unique levels of par
        par_levels = diag_cat_df[par_col].unique()
        for par_level in par_levels:
            # Filter the DataFrame for the current par level
            par_df = diag_cat_df[diag_cat_df[par_col] == par_level]

            # Check if both stim levels exist for the current diag_cat and par
            stim_levels = par_df[stim_col].unique()
            if len(stim_levels) != 2:
                continue  # Skip if both stim levels are not present

            # Filter the DataFrame for the two stim levels
            stim_df1 = par_df[par_df[stim_col] == stim_levels[0]]
            stim_df2 = par_df[par_df[stim_col] == stim_levels[1]]

            # Check if the lengths of the two stim levels are equal
            if len(stim_df1) != len(stim_df2):
                continue  # Skip if the lengths are unequal

            # Calculate Cohen's d for the within-subjects comparison
            ttest_result = stats.ttest_rel(stim_df1[val_col], stim_df2[val_col])
            cohens_d = ttest_result.statistic / par_df[val_col].std()

            # Append the results to the DataFrame
            results = results.append({
                'diag_cat': diag_cat_level,
                'par': par_level,
                'cohens_d': cohens_d
            }, ignore_index=True)

    return results


In [72]:
result_df = compute_cohens_d_within_subjects(merged_df, 'stim', 'diag_cat', 'par', 'val')
print(result_df)

  diag_cat        par   cohens_d
0       HC          a  15.740336
1       HC          v  -3.169049
2       HC          t -36.772777
3       HC      alpha   0.719287
4       HC  pos_alpha   0.197083
