# Subject comparisons

In [1]:
import pandas as pd
import numpy as np 
import glob
from os.path import join 
import seaborn as sns
import matplotlib.pyplot as plt
import sys
from statsmodels.stats.multitest import fdrcorrection
from scipy.stats import ttest_ind

if sys.platform == 'darwin':
    from matplotlib import rc
    rc("font", **{"family": "serif", "serif": ["Palatino"]})
    rc("text", usetex = True)


DATA_DIR = "/ritter/share/data/IMAGEN/IMAGEN_RAW/2.7"

In [2]:
df = pd.read_csv("fs-stats_FU2-FU2_n789.csv", dtype={"ID":str})
#df

In [3]:
df1 = df.query("label == 1")
df0 = df.query("label == 0")

In [4]:
def parse_table(files):
    df = pd.DataFrame()
    df = df.append(pd.read_csv(join(DATA_DIR, files[0])).assign(tp="BL"))
    df = df.append(pd.read_csv(join(DATA_DIR, files[1])).assign(tp="FU2"))
    df = df.rename(columns={"User code":"ID"})
    df["ID"] = df["ID"].str.replace("-C","").str.replace("-I","")
    return df

## Load questionnaires 

1. AUDIT: Alcohol Use Disorder Identification Task;
2. ESPAD: European survey about alcohol, tobacco and other drugs; 
3. SURPS: risk profiles for substance abuse; 
4. NEO: five personality test;
5. LEQ: life history events

In [5]:
# Add AUDIT total score 
dfq = pd.DataFrame()

dfx = parse_table(['BL/psytools/IMAGEN-IMGN_AUDIT_CHILD_RC5-IMAGEN_DIGEST.csv', 'FU2/psytools/IMAGEN-IMGN_AUDIT_CHILD_FU2-IMAGEN_DIGEST.csv'])
fx = dfx[["ID", "tp", "audit_total"]]
dfq = dfq.append(dfx)

# ESPAD
dfx = parse_table(['BL/psytools/IMAGEN-IMGN_ESPAD_CHILD_RC5-IMAGEN_DIGEST.csv', 'FU2/psytools/IMAGEN-IMGN_ESPAD_CHILD_FU2-IMAGEN_DIGEST.csv'])
dfx = dfx[["ID", "tp", "6", "ftnd_sum", "19a", "life_hash", "life_coke", "life_mdma"]]
dfq = pd.merge(dfq, dfx, "outer")

# SURPS
dfx = parse_table(['BL/psytools/IMAGEN-IMGN_SURPS_RC5-IMAGEN_SURVEY_DIGEST.csv', 'FU2/psytools/IMAGEN-IMGN_SURPS_FU2-IMAGEN_SURVEY_DIGEST.csv'])
dfx = dfx[["ID", "tp", 'h_mean', 'as_mean', "imp_mean", 'ss_mean']]
dfq = pd.merge(dfq, dfx, "outer")

# NEO personality
dfx = parse_table(['BL/psytools/IMAGEN-IMGN_NEO_FFI_CHILD_RC5-IMAGEN_SURVEY_DIGEST.csv', 'FU2/psytools/IMAGEN-IMGN_NEO_FFI_FU2-IMAGEN_SURVEY_DIGEST.csv'])
dfx = dfx[["ID", "tp", 'neur_mean', 'extr_mean', 'open_mean', 'agre_mean', 'cons_mean']]
dfq = pd.merge(dfq, dfx, "outer")

# LEQ life experiences 
dfx = parse_table(['BL/psytools/IMAGEN-IMGN_LEQ_RC5-BASIC_DIGEST.csv', 'FU2/psytools/IMAGEN-IMGN_LEQ_FU2-IMAGEN_DIGEST.csv'])
dfx = dfx[["ID", "tp", 'sexuality_ever_freq', 'devience_ever_freq', 'family_ever_freq', 'distress_ever_freq']]
dfq = pd.merge(dfq, dfx, "outer")

  if (await self.run_code(code, result,  async_=asy)):


In [6]:
# Add all questionnaire data to subject df
dftot = df.merge(dfq, "left", on="ID")

In [7]:
df0 = dftot.query("tp == 'FU2'").query("label == 0")
df1 = dftot.query("tp == 'FU2'").query("label == 1")

In [8]:
qs = [
 'audit_freq',
 'audit_symp',
 'audit_prob',
 'audit_total',
 '6',
 'ftnd_sum',
 '19a',
 'life_hash',
 'life_coke',
 'life_mdma',
 'h_mean',
 'as_mean',
 'imp_mean',
 'ss_mean',
 'neur_mean',
 'extr_mean',
 'open_mean',
 'agre_mean',
 'cons_mean',
 'sexuality_ever_freq',
 'devience_ever_freq',
 'family_ever_freq',
 'distress_ever_freq']

In [9]:
# Create a DF with all results: mean and sd per group and results of indep. t-test

dfres = pd.DataFrame()

for i,q in enumerate(qs):
    group0 = df0[q].values
    group1 = df1[q].values
    
    group0 = group0[~np.isnan(group0)]
    group1 = group1[~np.isnan(group1)]
    
    stat, p = ttest_ind(group0, group1, nan_policy="omit")
    
    row = pd.DataFrame({
        "q" : q,
        "mean1" : [np.mean(group1)],
        "sd1" : [np.std(group1)],
        "mean0" : [np.mean(group0)],
        "sd0" : [np.std(group0)],
        "t" : [stat], 
        "p-val" : p
    })
    
    dfres = dfres.append(row)

In [10]:
# FDR correction for p-values 
a,b = fdrcorrection(dfres["p-val"], alpha=0.05, is_sorted=False)

dfres["p-val-corr"] = b

dfres=dfres.round({"mean1":2,"sd1":2,"mean0":2,"sd0":2,"t":2})
dfres=dfres.drop("p-val",1)
dfres

Unnamed: 0,q,mean1,sd1,mean0,sd0,t,p-val-corr
0,audit_freq,6.32,2.11,2.08,1.59,-31.33,1.859215e-139
0,audit_symp,1.29,1.3,0.06,0.28,-17.55,1.1039519999999999e-57
0,audit_prob,1.95,2.63,0.14,0.59,-12.77,1.485867e-33
0,audit_total,9.56,4.76,2.29,1.92,-27.14,4.1540820000000004e-114
0,6,4.08,2.26,1.42,2.2,-16.63,6.974861e-53
0,ftnd_sum,0.62,1.44,0.19,0.78,-5.02,9.743539e-07
0,19a,5.6,0.49,0.47,0.5,-144.77,0.0
0,life_hash,3.05,2.36,0.64,1.52,-16.69,4.170165e-53
0,life_coke,0.28,0.86,0.01,0.17,-5.77,2.218021e-08
0,life_mdma,0.62,1.42,0.02,0.19,-7.99,1.250951e-14


In [11]:
# Save to Latex for thesis
dfres.to_latex("subj.tex")