In [1]:
"""
Title: Eating disorders Montecatini 
Author: Corrado Caudek
Description: Obtain the posterior means for the parameters of the HDDMrl model 
    for each participant using "ddm/traces.csv". In the present case, 
    the data of model M8 are used. 
    Obtain subj_code from the file used as input to hddm.
    Create a file with both subj_code and the posterior means of the 
    HDDMrl parameters.
Version History:
- v1.0 (July 6, 2023): Initial version
Contact: corrado.caudek@unifi.it
"""

# Virtual environment: py37_env

import datetime

now = datetime.datetime.now()
print("Current date and time : ")
print(now.strftime("%Y-%m-%d %H:%M:%S"))

Current date and time : 
2023-07-07 06:51:05


In [15]:
%matplotlib inline 

import os, time, csv, sys
import glob

import numpy as np
import pandas as pd
import xarray as xr
import matplotlib.pyplot as plt
import seaborn as sns
from statsmodels.distributions.empirical_distribution import ECDF
from scipy import stats

import hddm

import pymc
import pymc as pm
import arviz as az

# Data management
pd.options.display.max_colwidth = 100


In [3]:
data = pd.read_csv("ddm/traces.csv")
data = data.drop(data.columns[0], axis=1)
data.head()

Unnamed: 0,a(AN.food),a(AN.neutral),a(HC.food),a(HC.neutral),a(RI.food),a(RI.neutral),a_std,a_subj(AN.food).1,a_subj(AN.food).4,a_subj(AN.food).5,...,pos_alpha_subj(RI.neutral).83,pos_alpha_subj(RI.neutral).86,pos_alpha_subj(RI.neutral).91,pos_alpha_subj(RI.neutral).92,pos_alpha_subj(RI.neutral).93,pos_alpha_subj(RI.neutral).95,pos_alpha_subj(RI.neutral).99,pos_alpha_subj(RI.neutral).111,pos_alpha_subj(RI.neutral).113,pos_alpha_subj(RI.neutral).114
0,1.397416,1.249067,1.351315,1.162854,1.312107,1.260692,0.206479,1.214189,1.385565,1.53588,...,3.753372,1.625114,-0.083374,2.37205,3.923368,-1.683346,-0.087606,-0.038245,3.911917,4.256163
1,1.401915,1.311477,1.300428,1.166193,1.31224,1.343598,0.252398,1.354999,1.379008,1.574464,...,3.500831,3.848192,-0.825695,6.776927,0.970095,-3.189411,0.343492,-0.709189,2.600073,0.667245
2,1.43505,1.252805,1.298151,1.170947,1.302349,1.214292,0.232662,1.268966,1.334123,1.507743,...,5.65291,2.804052,-0.6136,6.341303,4.405931,-3.249074,1.57162,-0.932823,1.863105,0.756014
3,1.456558,1.304838,1.320196,1.253136,1.335521,1.267989,0.213264,1.27174,1.358004,1.581126,...,3.438743,4.626994,-0.265223,7.773233,1.85118,-5.162659,1.643261,-0.677264,1.88048,0.849897
4,1.418766,1.275415,1.296918,1.206623,1.317577,1.245782,0.22641,1.263081,1.293899,1.573345,...,4.118109,3.977853,-0.48586,5.050355,3.727054,-3.241631,-0.346091,-0.057078,2.042557,-0.85406


In [4]:
mean_values = data.mean()

# Create a new DataFrame 'df' with column names and mean values
df = pd.DataFrame({'params': mean_values.index, 'val': mean_values.values})
df["params"] = df["params"].astype(str)  # Convert the column to string type
# Filter rows where the first column contains the string 'subj'
df = df[df["params"].str.contains("subj")]
df["params"] = df["params"].astype(str)  # Convert the 'params' column to string type
df["subj_idx"] = df["params"].apply(lambda x: x.split(".")[-1])
df["par"] = df["params"].str.split("_sub").str[0]
df["grpstim"] = df["params"].str.extract(r"\((.*?)\)\.")
df[["diag_cat", "stim"]] = df["grpstim"].str.split(".", expand=True)
df = df.drop("grpstim", axis=1)
df["subj_idx"] = pd.to_numeric(df["subj_idx"])
df.head()

Unnamed: 0,params,val,subj_idx,par,diag_cat,stim
7,a_subj(AN.food).1,1.270092,1,a,AN,food
8,a_subj(AN.food).4,1.352607,4,a,AN,food
9,a_subj(AN.food).5,1.540525,5,a,AN,food
10,a_subj(AN.food).7,1.145499,7,a,AN,food
11,a_subj(AN.food).8,1.638459,8,a,AN,food


In [5]:
working_directory = "/Users/corrado/_repositories/eating_disorders_23/"

codes_tbl_path = os.path.join(
    working_directory,
    "data",
    "processed",
    "prl",
    "input_for_hddmrl",
    "three_groups",
    "ed_prl_data.csv",
)

codes_tbl = pd.read_csv(
    codes_tbl_path
)
codes_tbl = codes_tbl[["subj_idx", "subj_code"]]
codes_tbl = codes_tbl.drop_duplicates()
codes_tbl["subj_idx"] = pd.to_numeric(codes_tbl["subj_idx"])
codes_tbl.head()

Unnamed: 0,subj_idx,subj_code
0,1,ca_po_2002_05_25_700_f
320,2,gi_ba_2008_01_31_376_f
640,3,ca_fa_1996_03_26_092_f
960,4,em_or_2003_01_02_101_f
1280,5,au_ru_1998_09_21_806_f


In [6]:
merged_df = pd.merge(df, codes_tbl, on="subj_idx")
merged_df.shape

(1120, 7)

In [7]:
merged_df.head()

Unnamed: 0,params,val,subj_idx,par,diag_cat,stim,subj_code
0,a_subj(AN.food).1,1.270092,1,a,AN,food,ca_po_2002_05_25_700_f
1,a_subj(AN.neutral).1,1.131988,1,a,AN,neutral,ca_po_2002_05_25_700_f
2,v_subj(AN.food).1,0.799608,1,v,AN,food,ca_po_2002_05_25_700_f
3,v_subj(AN.neutral).1,0.900103,1,v,AN,neutral,ca_po_2002_05_25_700_f
4,t_subj(AN.food).1,0.088883,1,t,AN,food,ca_po_2002_05_25_700_f


## Save csv file

In [None]:
saved_file_path = os.path.join(
    working_directory,
    "data",
    "processed",
    "prl",
    "subj_hddm_params",
    "subj_code_hddm_params.csv",
)

merged_df.to_csv(
    saved_file_path,
    index=False,
)