## Neural Similarity Analysis

In [10]:
import pandas as pd
import scipy.stats as scp
import numpy as np 
import matplotlib.pyplot as plt
import seaborn as sns
import statsmodels.formula.api as smf

outdir = "/Users/emilyfurtado/Box/HBN_study/stress_analysis/"
#outdir = '/Users/catcamacho/Library/CloudStorage/Box-Box/CCP/HBN_study/stress_analysis/'

#neural activation data
data_dm = pd.read_csv(outdir+"neural_similarity_analysis/activation/full_sample/dynamic_movieDM/subject_level_peak_activation.csv", index_col = 0)
data_dm.index.name = "sub"
data_dm["class"].replace("class01", "conflict", inplace = True)
data_dm["class"].replace("class02", "highstress", inplace = True)
data_dm["class"].replace("class03", "lowstress", inplace = True)
data_tp = pd.read_csv(outdir+"neural_similarity_analysis/activation/full_sample/dynamic_movieTP/subject_level_peak_activation.csv", index_col = 0)
data_tp.index.name = "sub"
data_tp["class"].replace("class01", "conflict", inplace = True)
data_tp["class"].replace("class02", "highstress", inplace = True)
data_tp["class"].replace("class03", "lowstress", inplace = True)

#full data including depression scores
dep_data = pd.read_csv(outdir+"full_data_20220124.csv", index_col = 0)
dep_data = dep_data.loc[:,["MFQ_P_Total", "MFQ_SR_Total", "MDD_mean"]]
dep_data.index.name = "sub"

# merge activation and full data
data_dm = data_dm.merge(dep_data, how = "left", left_index = True, right_index = True).drop_duplicates()
data_tp = data_tp.merge(dep_data, how = "left", left_index = True, right_index = True).drop_duplicates()

#MFQ factor scores 
dep_sr_factor = pd.read_csv(outdir+"MFQsr_factorscores_20220629.csv", index_col = 0)
dep_pr_factor = pd.read_csv(outdir+"MFQpr_factorscores_20220629.csv", index_col = 0)
dep_factor = dep_sr_factor.merge(dep_pr_factor, how = "left", left_index = True, right_index = True)
dep_factor = dep_factor.loc[:,["MFQ_P_NegEmotion", "MFQ_SR_NegEmotion"]]
dep_factor.index = ["sub-{0}".format(sub) for sub in dep_factor.index]
dep_factor.index.name = "subject_id"

# merge activation with mfq factor
data_dm_factor = data_dm.merge(dep_factor, how = "left", left_index = True, right_index = True).drop_duplicates()
data_tp_factor = data_tp.merge(dep_factor, how = "left", left_index = True, right_index = True).drop_duplicates()

# set up results dataframe
results = pd.DataFrame()
ind = 0

##                                             ACTIVATION - STRESS

## Ventral Attention Network

In [16]:
#weighted least squares regression model - movie DM (Discovery)
data_dm["class_string"] = data_dm["class"]
for x in range(1,13):
    res = smf.wls("VentralAttn_peak{0}_mean ~ age + class_string + female + meanFD".format(x), 
                  data = data_dm.loc[data_dm['site'] == "rubic",:]).fit()
    results.loc[ind, 'dataset'] = 'discovery'
    results.loc[ind, 'movie'] = 'DM'
    results.loc[ind, 'networkDV'] = 'VentralAttn'
    results.loc[ind, 'sceneNo'] = x
    results.loc[ind, 'group2_tstat'] = res.tvalues['class_string[T.highstress]']
    results.loc[ind, 'group2_pval'] = res.pvalues['class_string[T.highstress]']
    results.loc[ind, 'group3_tstat'] = res.tvalues['class_string[T.lowstress]']
    results.loc[ind, 'group3_pval'] = res.pvalues['class_string[T.lowstress]']
    ind = ind + 1
    print(res.summary())

                              WLS Regression Results                              
Dep. Variable:     VentralAttn_peak1_mean   R-squared:                       0.071
Model:                                WLS   Adj. R-squared:                  0.054
Method:                     Least Squares   F-statistic:                     4.280
Date:                    Thu, 01 Sep 2022   Prob (F-statistic):           0.000911
Time:                            17:37:19   Log-Likelihood:                -941.58
No. Observations:                     286   AIC:                             1895.
Df Residuals:                         280   BIC:                             1917.
Df Model:                               5                                         
Covariance Type:                nonrobust                                         
                                 coef    std err          t      P>|t|      [0.025      0.975]
---------------------------------------------------------------------------

In [18]:
#weighted least squares regression model - movie DM (Replication)
data_dm["class_string"] = data_dm["class"]
for x in range(1,13):
    res = smf.wls("VentralAttn_peak{0}_mean ~ age + class_string + female + meanFD".format(x), 
                  data = data_dm.loc[data_dm['site'] == "cbic",:]).fit()
    results.loc[ind, 'dataset'] = 'replication'
    results.loc[ind, 'movie'] = 'DM'
    results.loc[ind, 'networkDV'] = 'VentralAttn'
    results.loc[ind, 'sceneNo'] = x
    results.loc[ind, 'group2_tstat'] = res.tvalues['class_string[T.highstress]']
    results.loc[ind, 'group2_pval'] = res.pvalues['class_string[T.highstress]']
    results.loc[ind, 'group3_tstat'] = res.tvalues['class_string[T.lowstress]']
    results.loc[ind, 'group3_pval'] = res.pvalues['class_string[T.lowstress]']
    ind = ind + 1
    print(res.summary())

                              WLS Regression Results                              
Dep. Variable:     VentralAttn_peak1_mean   R-squared:                       0.054
Model:                                WLS   Adj. R-squared:                  0.031
Method:                     Least Squares   F-statistic:                     2.349
Date:                    Thu, 01 Sep 2022   Prob (F-statistic):             0.0423
Time:                            17:38:10   Log-Likelihood:                -694.16
No. Observations:                     210   AIC:                             1400.
Df Residuals:                         204   BIC:                             1420.
Df Model:                               5                                         
Covariance Type:                nonrobust                                         
                                 coef    std err          t      P>|t|      [0.025      0.975]
---------------------------------------------------------------------------

In [28]:
#weighted least squares regression model - movie TP (Discovery)
data_tp["class_string"] = data_tp["class"]
for x in range(1,6):
    res = smf.wls("VentralAttn_peak{0}_mean ~ age + class_string + female + meanFD".format(x), 
                  data = data_tp.loc[data_tp['site'] == "rubic",:]).fit()
    results.loc[ind, 'dataset'] = 'discovery'
    results.loc[ind, 'movie'] = 'TP'
    results.loc[ind, 'networkDV'] = 'VentralAttn'
    results.loc[ind, 'sceneNo'] = x
    results.loc[ind, 'group2_tstat'] = res.tvalues['class_string[T.class02]']
    results.loc[ind, 'group2_pval'] = res.pvalues['class_string[T.class02]']
    results.loc[ind, 'group3_tstat'] = res.tvalues['class_string[T.class03]']
    results.loc[ind, 'group3_pval'] = res.pvalues['class_string[T.class03]']
    ind = ind + 1
    print(res.summary())

                              WLS Regression Results                              
Dep. Variable:     VentralAttn_peak1_mean   R-squared:                       0.056
Model:                                WLS   Adj. R-squared:                  0.041
Method:                     Least Squares   F-statistic:                     3.820
Date:                    Tue, 30 Aug 2022   Prob (F-statistic):            0.00224
Time:                            20:40:50   Log-Likelihood:                -1096.5
No. Observations:                     329   AIC:                             2205.
Df Residuals:                         323   BIC:                             2228.
Df Model:                               5                                         
Covariance Type:                nonrobust                                         
                              coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------

In [29]:
#weighted least squares regression model - movie TP (Replication)
data_tp["class_string"] = data_tp["class"]
for x in range(1,6):
    res = smf.wls("VentralAttn_peak{0}_mean ~ age + class_string + female + meanFD".format(x), 
                  data = data_tp.loc[data_tp['site'] == "cbic",:]).fit()
    results.loc[ind, 'dataset'] = 'replication'
    results.loc[ind, 'movie'] = 'TP'
    results.loc[ind, 'networkDV'] = 'VentralAttn'
    results.loc[ind, 'sceneNo'] = x
    results.loc[ind, 'group2_tstat'] = res.tvalues['class_string[T.class02]']
    results.loc[ind, 'group2_pval'] = res.pvalues['class_string[T.class02]']
    results.loc[ind, 'group3_tstat'] = res.tvalues['class_string[T.class03]']
    results.loc[ind, 'group3_pval'] = res.pvalues['class_string[T.class03]']
    ind = ind + 1
    print(res.summary())

                              WLS Regression Results                              
Dep. Variable:     VentralAttn_peak1_mean   R-squared:                       0.051
Model:                                WLS   Adj. R-squared:                  0.031
Method:                     Least Squares   F-statistic:                     2.577
Date:                    Tue, 30 Aug 2022   Prob (F-statistic):             0.0271
Time:                            20:40:55   Log-Likelihood:                -824.84
No. Observations:                     244   AIC:                             1662.
Df Residuals:                         238   BIC:                             1683.
Df Model:                               5                                         
Covariance Type:                nonrobust                                         
                              coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------

## Default Mode Network

In [30]:
#weighted least squares regression model - movie DM (Discovery)
data_dm["class_string"] = data_dm["class"]
for x in range(1,10):
    res = smf.wls("Default_peak{0}_mean ~ age + class_string + female + meanFD".format(x), 
                  data = data_dm.loc[data_dm['site'] == "rubic",:]).fit()
    results.loc[ind, 'dataset'] = 'discovery'
    results.loc[ind, 'movie'] = 'DM'
    results.loc[ind, 'networkDV'] = 'Default'
    results.loc[ind, 'sceneNo'] = x
    results.loc[ind, 'group2_tstat'] = res.tvalues['class_string[T.class02]']
    results.loc[ind, 'group2_pval'] = res.pvalues['class_string[T.class02]']
    results.loc[ind, 'group3_tstat'] = res.tvalues['class_string[T.class03]']
    results.loc[ind, 'group3_pval'] = res.pvalues['class_string[T.class03]']
    ind = ind + 1
    print(res.summary())

                            WLS Regression Results                            
Dep. Variable:     Default_peak1_mean   R-squared:                       0.095
Model:                            WLS   Adj. R-squared:                  0.079
Method:                 Least Squares   F-statistic:                     5.906
Date:                Tue, 30 Aug 2022   Prob (F-statistic):           3.29e-05
Time:                        20:40:58   Log-Likelihood:                -809.05
No. Observations:                 286   AIC:                             1630.
Df Residuals:                     280   BIC:                             1652.
Df Model:                           5                                         
Covariance Type:            nonrobust                                         
                              coef    std err          t      P>|t|      [0.025      0.975]
-------------------------------------------------------------------------------------------
Intercept                 

In [31]:
#weighted least squares regression model - movie DM (Replication)
data_dm["class_string"] = data_dm["class"]
for x in range(1,10):
    res = smf.wls("Default_peak{0}_mean ~ age + class_string + female + meanFD".format(x), 
                  data = data_dm.loc[data_dm['site'] == "cbic",:]).fit()
    results.loc[ind, 'dataset'] = 'replication'
    results.loc[ind, 'movie'] = 'DM'
    results.loc[ind, 'networkDV'] = 'Default'
    results.loc[ind, 'sceneNo'] = x
    results.loc[ind, 'group2_tstat'] = res.tvalues['class_string[T.class02]']
    results.loc[ind, 'group2_pval'] = res.pvalues['class_string[T.class02]']
    results.loc[ind, 'group3_tstat'] = res.tvalues['class_string[T.class03]']
    results.loc[ind, 'group3_pval'] = res.pvalues['class_string[T.class03]']
    ind = ind + 1
    print(res.summary())

                            WLS Regression Results                            
Dep. Variable:     Default_peak1_mean   R-squared:                       0.021
Model:                            WLS   Adj. R-squared:                 -0.003
Method:                 Least Squares   F-statistic:                    0.8623
Date:                Tue, 30 Aug 2022   Prob (F-statistic):              0.507
Time:                        20:41:00   Log-Likelihood:                -630.87
No. Observations:                 210   AIC:                             1274.
Df Residuals:                     204   BIC:                             1294.
Df Model:                           5                                         
Covariance Type:            nonrobust                                         
                              coef    std err          t      P>|t|      [0.025      0.975]
-------------------------------------------------------------------------------------------
Intercept                 

In [32]:
#weighted least squares regression model - movie TP (Discovery)
data_tp["class_string"] = data_tp["class"]
for x in range(1,6):
    if 'Default_peak{0}_mean'.format(x) in data_tp.columns:
        res = smf.wls("Default_peak{0}_mean ~ age + class_string + female + meanFD".format(x), 
                      data = data_tp.loc[data_tp['site'] == "rubic",:]).fit()
        results.loc[ind, 'dataset'] = 'discovery'
        results.loc[ind, 'movie'] = 'TP'
        results.loc[ind, 'networkDV'] = 'Default'
        results.loc[ind, 'sceneNo'] = x
        results.loc[ind, 'group2_tstat'] = res.tvalues['class_string[T.class02]']
        results.loc[ind, 'group2_pval'] = res.pvalues['class_string[T.class02]']
        results.loc[ind, 'group3_tstat'] = res.tvalues['class_string[T.class03]']
        results.loc[ind, 'group3_pval'] = res.pvalues['class_string[T.class03]']
        ind = ind + 1
    else:
        break
    print(res.summary())

                            WLS Regression Results                            
Dep. Variable:     Default_peak1_mean   R-squared:                       0.019
Model:                            WLS   Adj. R-squared:                  0.004
Method:                 Least Squares   F-statistic:                     1.267
Date:                Tue, 30 Aug 2022   Prob (F-statistic):              0.278
Time:                        20:41:01   Log-Likelihood:                -912.42
No. Observations:                 329   AIC:                             1837.
Df Residuals:                     323   BIC:                             1860.
Df Model:                           5                                         
Covariance Type:            nonrobust                                         
                              coef    std err          t      P>|t|      [0.025      0.975]
-------------------------------------------------------------------------------------------
Intercept                 

In [33]:
#weighted least squares regression model - movie TP (Replication)
data_tp["class_string"] = data_tp["class"]
for x in range(1,6):
    if 'Default_peak{0}_mean'.format(x) in data_tp.columns:
        res = smf.wls("Default_peak{0}_mean ~ age + class_string + female + meanFD".format(x), 
                      data = data_tp.loc[data_tp['site'] == "cbic",:]).fit()
        results.loc[ind, 'dataset'] = 'replication'
        results.loc[ind, 'movie'] = 'TP'
        results.loc[ind, 'networkDV'] = 'Default'
        results.loc[ind, 'sceneNo'] = x
        results.loc[ind, 'group2_tstat'] = res.tvalues['class_string[T.class02]']
        results.loc[ind, 'group2_pval'] = res.pvalues['class_string[T.class02]']
        results.loc[ind, 'group3_tstat'] = res.tvalues['class_string[T.class03]']
        results.loc[ind, 'group3_pval'] = res.pvalues['class_string[T.class03]']
        ind = ind + 1
    else:
        break
    print(res.summary())

                            WLS Regression Results                            
Dep. Variable:     Default_peak1_mean   R-squared:                       0.089
Model:                            WLS   Adj. R-squared:                  0.070
Method:                 Least Squares   F-statistic:                     4.648
Date:                Tue, 30 Aug 2022   Prob (F-statistic):           0.000456
Time:                        20:41:02   Log-Likelihood:                -688.56
No. Observations:                 244   AIC:                             1389.
Df Residuals:                     238   BIC:                             1410.
Df Model:                           5                                         
Covariance Type:            nonrobust                                         
                              coef    std err          t      P>|t|      [0.025      0.975]
-------------------------------------------------------------------------------------------
Intercept                 

## Cingulo-Opercular Network

In [34]:
#weighted least squares regression model - movie DM (Discovery)
data_dm["class_string"] = data_dm["class"]
for x in range(1,8):
    res = smf.wls("CinguloOperc_peak{0}_mean ~ age + class_string + female + meanFD".format(x), 
                  data = data_dm.loc[data_dm['site'] == "rubic",:]).fit()
    results.loc[ind, 'dataset'] = 'discovery'
    results.loc[ind, 'movie'] = 'DM'
    results.loc[ind, 'networkDV'] = 'CinguloOperc'
    results.loc[ind, 'sceneNo'] = x
    results.loc[ind, 'group2_tstat'] = res.tvalues['class_string[T.class02]']
    results.loc[ind, 'group2_pval'] = res.pvalues['class_string[T.class02]']
    results.loc[ind, 'group3_tstat'] = res.tvalues['class_string[T.class03]']
    results.loc[ind, 'group3_pval'] = res.pvalues['class_string[T.class03]']
    ind = ind + 1
    print(res.summary())

                               WLS Regression Results                              
Dep. Variable:     CinguloOperc_peak1_mean   R-squared:                       0.056
Model:                                 WLS   Adj. R-squared:                  0.039
Method:                      Least Squares   F-statistic:                     3.329
Date:                     Tue, 30 Aug 2022   Prob (F-statistic):            0.00616
Time:                             20:41:05   Log-Likelihood:                -852.98
No. Observations:                      286   AIC:                             1718.
Df Residuals:                          280   BIC:                             1740.
Df Model:                                5                                         
Covariance Type:                 nonrobust                                         
                              coef    std err          t      P>|t|      [0.025      0.975]
--------------------------------------------------------------------

In [35]:
#weighted least squares regression model - movie DM (Replication)
data_dm["class_string"] = data_dm["class"]
for x in range(1,8):
    res = smf.wls("CinguloOperc_peak{0}_mean ~ age + class_string + female + meanFD".format(x), 
                  data = data_dm.loc[data_dm['site'] == "cbic",:]).fit()
    results.loc[ind, 'dataset'] = 'replication'
    results.loc[ind, 'movie'] = 'DM'
    results.loc[ind, 'networkDV'] = 'CinguloOperc'
    results.loc[ind, 'sceneNo'] = x
    results.loc[ind, 'group2_tstat'] = res.tvalues['class_string[T.class02]']
    results.loc[ind, 'group2_pval'] = res.pvalues['class_string[T.class02]']
    results.loc[ind, 'group3_tstat'] = res.tvalues['class_string[T.class03]']
    results.loc[ind, 'group3_pval'] = res.pvalues['class_string[T.class03]']
    ind = ind + 1
    print(res.summary())

                               WLS Regression Results                              
Dep. Variable:     CinguloOperc_peak1_mean   R-squared:                       0.011
Model:                                 WLS   Adj. R-squared:                 -0.014
Method:                      Least Squares   F-statistic:                    0.4351
Date:                     Tue, 30 Aug 2022   Prob (F-statistic):              0.824
Time:                             20:41:06   Log-Likelihood:                -641.21
No. Observations:                      210   AIC:                             1294.
Df Residuals:                          204   BIC:                             1315.
Df Model:                                5                                         
Covariance Type:                 nonrobust                                         
                              coef    std err          t      P>|t|      [0.025      0.975]
--------------------------------------------------------------------

In [36]:
#weighted least squares regression model - movie TP (Discovery)
data_tp["class_string"] = data_tp["class"]
for x in range(1,7):
    if 'CinguloOperc_peak{0}_mean'.format(x) in data_tp.columns:
        res = smf.wls("CinguloOperc_peak{0}_mean ~ age + class_string + female + meanFD".format(x), 
                      data = data_tp.loc[data_tp['site'] == "rubic",:]).fit()
        results.loc[ind, 'dataset'] = 'discovery'
        results.loc[ind, 'movie'] = 'TP'
        results.loc[ind, 'networkDV'] = 'CinguloOperc'
        results.loc[ind, 'sceneNo'] = x
        results.loc[ind, 'group2_tstat'] = res.tvalues['class_string[T.class02]']
        results.loc[ind, 'group2_pval'] = res.pvalues['class_string[T.class02]']
        results.loc[ind, 'group3_tstat'] = res.tvalues['class_string[T.class03]']
        results.loc[ind, 'group3_pval'] = res.pvalues['class_string[T.class03]']
        ind = ind + 1
    else:
        break
    print(res.summary())

                               WLS Regression Results                              
Dep. Variable:     CinguloOperc_peak1_mean   R-squared:                       0.043
Model:                                 WLS   Adj. R-squared:                  0.028
Method:                      Least Squares   F-statistic:                     2.921
Date:                     Tue, 30 Aug 2022   Prob (F-statistic):             0.0135
Time:                             20:41:06   Log-Likelihood:                -1055.7
No. Observations:                      329   AIC:                             2123.
Df Residuals:                          323   BIC:                             2146.
Df Model:                                5                                         
Covariance Type:                 nonrobust                                         
                              coef    std err          t      P>|t|      [0.025      0.975]
--------------------------------------------------------------------

In [37]:
#weighted least squares regression model - movie TP (Replication)
data_tp["class_string"] = data_tp["class"]
for x in range(1,7):
    if 'CinguloOperc_peak{0}_mean'.format(x) in data_tp.columns:
        res = smf.wls("CinguloOperc_peak{0}_mean ~ age + class_string + female + meanFD".format(x), 
                      data = data_tp.loc[data_tp['site'] == "cbic",:]).fit()
        results.loc[ind, 'dataset'] = 'replication'
        results.loc[ind, 'movie'] = 'TP'
        results.loc[ind, 'networkDV'] = 'CinguloOperc'
        results.loc[ind, 'sceneNo'] = x
        results.loc[ind, 'group2_tstat'] = res.tvalues['class_string[T.class02]']
        results.loc[ind, 'group2_pval'] = res.pvalues['class_string[T.class02]']
        results.loc[ind, 'group3_tstat'] = res.tvalues['class_string[T.class03]']
        results.loc[ind, 'group3_pval'] = res.pvalues['class_string[T.class03]']
        ind = ind + 1
    else:
        break
    print(res.summary())

                               WLS Regression Results                              
Dep. Variable:     CinguloOperc_peak1_mean   R-squared:                       0.018
Model:                                 WLS   Adj. R-squared:                 -0.003
Method:                      Least Squares   F-statistic:                    0.8737
Date:                     Tue, 30 Aug 2022   Prob (F-statistic):              0.499
Time:                             20:41:07   Log-Likelihood:                -799.73
No. Observations:                      244   AIC:                             1611.
Df Residuals:                          238   BIC:                             1632.
Df Model:                                5                                         
Covariance Type:                 nonrobust                                         
                              coef    std err          t      P>|t|      [0.025      0.975]
--------------------------------------------------------------------

## Amygdala

In [38]:
#weighted least squares regression model - movie DM (Discovery)
data_dm["class_string"] = data_dm["class"]
for x in range(1,8):
    res = smf.wls("Amygdala_peak{0}_mean ~ age + class_string + female + meanFD".format(x), 
                  data = data_dm.loc[data_dm['site'] == "rubic",:]).fit()
    results.loc[ind, 'dataset'] = 'discovery'
    results.loc[ind, 'movie'] = 'DM'
    results.loc[ind, 'networkDV'] = 'Amygdala'
    results.loc[ind, 'sceneNo'] = x
    results.loc[ind, 'group2_tstat'] = res.tvalues['class_string[T.class02]']
    results.loc[ind, 'group2_pval'] = res.pvalues['class_string[T.class02]']
    results.loc[ind, 'group3_tstat'] = res.tvalues['class_string[T.class03]']
    results.loc[ind, 'group3_pval'] = res.pvalues['class_string[T.class03]']
    ind = ind + 1
    print(res.summary())

PatsyError: Error evaluating factor: NameError: name 'Amygdala_peak1_mean' is not defined
    Amygdala_peak1_mean ~ age + class_string + female + meanFD
    ^^^^^^^^^^^^^^^^^^^

In [39]:
#weighted least squares regression model - movie DM (Replication)
data_dm["class_string"] = data_dm["class"]
for x in range(1,8):
    res = smf.wls("Amygdala_peak{0}_mean ~ age + class_string + female + meanFD".format(x), 
                  data = data_dm.loc[data_dm['site'] == "cbic",:]).fit()
    results.loc[ind, 'dataset'] = 'replication'
    results.loc[ind, 'movie'] = 'DM'
    results.loc[ind, 'networkDV'] = 'Amygdala'
    results.loc[ind, 'sceneNo'] = x
    results.loc[ind, 'group2_tstat'] = res.tvalues['class_string[T.class02]']
    results.loc[ind, 'group2_pval'] = res.pvalues['class_string[T.class02]']
    results.loc[ind, 'group3_tstat'] = res.tvalues['class_string[T.class03]']
    results.loc[ind, 'group3_pval'] = res.pvalues['class_string[T.class03]']
    ind = ind + 1
    print(res.summary())

PatsyError: Error evaluating factor: NameError: name 'Amygdala_peak1_mean' is not defined
    Amygdala_peak1_mean ~ age + class_string + female + meanFD
    ^^^^^^^^^^^^^^^^^^^

In [40]:
#weighted least squares regression model - movie TP (Discovery)
data_tp["class_string"] = data_tp["class"]
for x in range(1,7):
    if 'Amygdala{0}_mean'.format(x) in data_tp.columns:
        res = smf.wls("Amygdala_peak{0}_mean ~ age + class_string + female + meanFD".format(x), 
                      data = data_tp.loc[data_tp['site'] == "rubic",:]).fit()
        results.loc[ind, 'dataset'] = 'discovery'
        results.loc[ind, 'movie'] = 'TP'
        results.loc[ind, 'networkDV'] = 'Amygdala'
        results.loc[ind, 'sceneNo'] = x
        results.loc[ind, 'group2_tstat'] = res.tvalues['class_string[T.class02]']
        results.loc[ind, 'group2_pval'] = res.pvalues['class_string[T.class02]']
        results.loc[ind, 'group3_tstat'] = res.tvalues['class_string[T.class03]']
        results.loc[ind, 'group3_pval'] = res.pvalues['class_string[T.class03]']
        ind = ind + 1
    else:
        break
    print(res.summary())

In [41]:
#weighted least squares regression model - movie TP (Replication)
data_tp["class_string"] = data_tp["class"]
for x in range(1,7):
    if 'Amygdala_peak{0}_mean'.format(x) in data_tp.columns:
        res = smf.wls("Amygdala_peak{0}_mean ~ age + class_string + female + meanFD".format(x), 
                      data = data_tp.loc[data_tp['site'] == "cbic",:]).fit()
        results.loc[ind, 'dataset'] = 'replication'
        results.loc[ind, 'movie'] = 'TP'
        results.loc[ind, 'networkDV'] = 'Amygdala'
        results.loc[ind, 'sceneNo'] = x
        results.loc[ind, 'group2_tstat'] = res.tvalues['class_string[T.class02]']
        results.loc[ind, 'group2_pval'] = res.pvalues['class_string[T.class02]']
        results.loc[ind, 'group3_tstat'] = res.tvalues['class_string[T.class03]']
        results.loc[ind, 'group3_pval'] = res.pvalues['class_string[T.class03]']
        ind = ind + 1
    else:
        break
    print(res.summary())

In [42]:
results.to_csv('class_activation_results.csv')

## ACTIVATION - DEPRESSION

## Ventral Attn

In [49]:
#Ventral Attn
#weighted least squares regression model - movie DM (Discovery)- Depression 
for x in range(1,13):
    res = smf.wls("VentralAttn_peak{0}_mean ~ age + MFQ_P_Total + female + meanFD".format(x), 
                  data = data_dm.loc[data_dm['site'] == "rubic",:]).fit()
    results.loc[ind, 'dataset'] = 'discovery'
    results.loc[ind, 'movie'] = 'DM'
    results.loc[ind, 'networkDV'] = 'VentralAttn'
    print(res.summary())

                              WLS Regression Results                              
Dep. Variable:     VentralAttn_peak1_mean   R-squared:                       0.056
Model:                                WLS   Adj. R-squared:                  0.042
Method:                     Least Squares   F-statistic:                     3.973
Date:                    Tue, 30 Aug 2022   Prob (F-statistic):            0.00378
Time:                            20:43:16   Log-Likelihood:                -904.22
No. Observations:                     274   AIC:                             1818.
Df Residuals:                         269   BIC:                             1837.
Df Model:                               4                                         
Covariance Type:                nonrobust                                         
                  coef    std err          t      P>|t|      [0.025      0.975]
-------------------------------------------------------------------------------
Intercept 

In [44]:
#Ventral Attn
#weighted least squares regression model - movie DM (Replication) - Depression 
for x in range(1,13):
    res = smf.wls("VentralAttn_peak{0}_mean ~ age + MFQ_P_Total + female + meanFD".format(x), 
                  data = data_dm.loc[data_dm['site'] == "cbic",:]).fit()
    results.loc[ind, 'dataset'] = 'replication'
    results.loc[ind, 'movie'] = 'DM'
    results.loc[ind, 'networkDV'] = 'VentralAttn'
    print(res.summary())

                              WLS Regression Results                              
Dep. Variable:     VentralAttn_peak1_mean   R-squared:                       0.042
Model:                                WLS   Adj. R-squared:                  0.024
Method:                     Least Squares   F-statistic:                     2.274
Date:                    Tue, 30 Aug 2022   Prob (F-statistic):             0.0626
Time:                            20:41:47   Log-Likelihood:                -695.48
No. Observations:                     210   AIC:                             1401.
Df Residuals:                         205   BIC:                             1418.
Df Model:                               4                                         
Covariance Type:                nonrobust                                         
                  coef    std err          t      P>|t|      [0.025      0.975]
-------------------------------------------------------------------------------
Intercept 

## Default Mode

In [45]:
#Default Mode
#weighted least squares regression model - movie DM (Discovery) - Depression
for x in range(1,10):
    res = smf.wls("Default_peak{0}_mean ~ age + MFQ_P_Total + female + meanFD".format(x), 
                  data = data_dm.loc[data_dm['site'] == "rubic",:]).fit()
    results.loc[ind, 'dataset'] = 'discovery'
    results.loc[ind, 'movie'] = 'DM'
    results.loc[ind, 'networkDV'] = 'Default'
    print(res.summary())

                            WLS Regression Results                            
Dep. Variable:     Default_peak1_mean   R-squared:                       0.093
Model:                            WLS   Adj. R-squared:                  0.080
Method:                 Least Squares   F-statistic:                     6.932
Date:                Tue, 30 Aug 2022   Prob (F-statistic):           2.52e-05
Time:                        20:41:48   Log-Likelihood:                -768.61
No. Observations:                 274   AIC:                             1547.
Df Residuals:                     269   BIC:                             1565.
Df Model:                           4                                         
Covariance Type:            nonrobust                                         
                  coef    std err          t      P>|t|      [0.025      0.975]
-------------------------------------------------------------------------------
Intercept       6.1879      1.222      5.066      

In [46]:
#Default Mode
#weighted least squares regression model - movie DM (Replication) - Depression
for x in range(1,10):
    res = smf.wls("Default_peak{0}_mean ~ age + MFQ_P_Total + female + meanFD".format(x), 
                  data = data_dm.loc[data_dm['site'] == "cbic",:]).fit()
    results.loc[ind, 'dataset'] = 'discovery'
    results.loc[ind, 'movie'] = 'DM'
    results.loc[ind, 'networkDV'] = 'Default'
    print(res.summary())

                            WLS Regression Results                            
Dep. Variable:     Default_peak1_mean   R-squared:                       0.023
Model:                            WLS   Adj. R-squared:                  0.004
Method:                 Least Squares   F-statistic:                     1.217
Date:                Tue, 30 Aug 2022   Prob (F-statistic):              0.305
Time:                        20:41:50   Log-Likelihood:                -630.60
No. Observations:                 210   AIC:                             1271.
Df Residuals:                     205   BIC:                             1288.
Df Model:                           4                                         
Covariance Type:            nonrobust                                         
                  coef    std err          t      P>|t|      [0.025      0.975]
-------------------------------------------------------------------------------
Intercept       7.0067      1.591      4.403      

In [47]:
results.to_csv('dep_activation_results.csv')

## ACTIVATION - Parent Reported DEPRESSION (FACTOR) 

In [58]:
#Ventral Attn
#weighted least squares regression model - movie DM (Discovery)- Depression 
for x in range(1,13):
    res = smf.wls("VentralAttn_peak{0}_mean ~ age + MFQ_P_NegEmotion + female + meanFD".format(x), 
                  data = data_dm_factor.loc[data_dm_factor['site'] == "rubic",:]).fit()
    results.loc[ind, 'dataset'] = 'discovery'
    results.loc[ind, 'movie'] = 'DM'
    results.loc[ind, 'networkDV'] = 'VentralAttn'
    print(res.summary())

                              WLS Regression Results                              
Dep. Variable:     VentralAttn_peak1_mean   R-squared:                       0.035
Model:                                WLS   Adj. R-squared:                  0.016
Method:                     Least Squares   F-statistic:                     1.872
Date:                    Tue, 30 Aug 2022   Prob (F-statistic):              0.116
Time:                            21:09:20   Log-Likelihood:                -714.37
No. Observations:                     214   AIC:                             1439.
Df Residuals:                         209   BIC:                             1456.
Df Model:                               4                                         
Covariance Type:                nonrobust                                         
                       coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------------


In [59]:
#Ventral Attn
#weighted least squares regression model - movie DM (Replication)- Depression 
for x in range(1,13):
    res = smf.wls("VentralAttn_peak{0}_mean ~ age + MFQ_P_NegEmotion + female + meanFD".format(x), 
                  data = data_dm_factor.loc[data_dm_factor['site'] == "cbic",:]).fit()
    results.loc[ind, 'dataset'] = 'discovery'
    results.loc[ind, 'movie'] = 'DM'
    results.loc[ind, 'networkDV'] = 'VentralAttn'
    print(res.summary())

                              WLS Regression Results                              
Dep. Variable:     VentralAttn_peak1_mean   R-squared:                       0.028
Model:                                WLS   Adj. R-squared:                  0.004
Method:                     Least Squares   F-statistic:                     1.168
Date:                    Tue, 30 Aug 2022   Prob (F-statistic):              0.327
Time:                            21:09:21   Log-Likelihood:                -557.94
No. Observations:                     170   AIC:                             1126.
Df Residuals:                         165   BIC:                             1142.
Df Model:                               4                                         
Covariance Type:                nonrobust                                         
                       coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------------


In [60]:
#Default Mode
#weighted least squares regression model - movie DM (Discovery) - Depression
for x in range(1,10):
    res = smf.wls("Default_peak{0}_mean ~ age + MFQ_SR_NegEmotion + female + meanFD".format(x), 
                  data = data_dm_factor.loc[data_dm_factor['site'] == "rubic",:]).fit()
    results.loc[ind, 'dataset'] = 'discovery'
    results.loc[ind, 'movie'] = 'DM'
    results.loc[ind, 'networkDV'] = 'Default'
    print(res.summary())

                            WLS Regression Results                            
Dep. Variable:     Default_peak1_mean   R-squared:                       0.086
Model:                            WLS   Adj. R-squared:                  0.069
Method:                 Least Squares   F-statistic:                     4.962
Date:                Tue, 30 Aug 2022   Prob (F-statistic):           0.000767
Time:                        21:09:21   Log-Likelihood:                -599.05
No. Observations:                 215   AIC:                             1208.
Df Residuals:                     210   BIC:                             1225.
Df Model:                           4                                         
Covariance Type:            nonrobust                                         
                        coef    std err          t      P>|t|      [0.025      0.975]
-------------------------------------------------------------------------------------
Intercept             7.4663      1.74

In [68]:
#Default Mode
#weighted least squares regression model - movie DM (Discovery) - Depression
for x in range(1,10):
    res = smf.wls("Default_peak{0}_mean ~ age + MFQ_SR_NegEmotion + female + meanFD".format(x), 
                  data = data_dm_factor.loc[data_dm_factor['site'] == "cbic",:]).fit()
    results.loc[ind, 'dataset'] = 'discovery'
    results.loc[ind, 'movie'] = 'DM'
    results.loc[ind, 'networkDV'] = 'Default'
    print(res.summary())

                            WLS Regression Results                            
Dep. Variable:     Default_peak1_mean   R-squared:                       0.028
Model:                            WLS   Adj. R-squared:                  0.004
Method:                 Least Squares   F-statistic:                     1.179
Date:                Tue, 30 Aug 2022   Prob (F-statistic):              0.322
Time:                        21:15:01   Log-Likelihood:                -514.92
No. Observations:                 170   AIC:                             1040.
Df Residuals:                     165   BIC:                             1056.
Df Model:                           4                                         
Covariance Type:            nonrobust                                         
                        coef    std err          t      P>|t|      [0.025      0.975]
-------------------------------------------------------------------------------------
Intercept             8.0986      2.19

## ACTIVATION - Self Reported DEPRESSION (FACTOR) 

In [69]:
#Ventral Attn
#weighted least squares regression model - movie DM (Discovery)- Depression 
for x in range(1,13):
    res = smf.wls("VentralAttn_peak{0}_mean ~ age + MFQ_SR_NegEmotion + female + meanFD".format(x), 
                  data = data_dm_factor.loc[data_dm_factor['site'] == "rubic",:]).fit()
    results.loc[ind, 'dataset'] = 'discovery'
    results.loc[ind, 'movie'] = 'DM'
    results.loc[ind, 'networkDV'] = 'VentralAttn'
    print(res.summary())

                              WLS Regression Results                              
Dep. Variable:     VentralAttn_peak1_mean   R-squared:                       0.035
Model:                                WLS   Adj. R-squared:                  0.016
Method:                     Least Squares   F-statistic:                     1.886
Date:                    Tue, 30 Aug 2022   Prob (F-statistic):              0.114
Time:                            21:15:06   Log-Likelihood:                -717.29
No. Observations:                     215   AIC:                             1445.
Df Residuals:                         210   BIC:                             1461.
Df Model:                               4                                         
Covariance Type:                nonrobust                                         
                        coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------------

In [70]:
#Ventral Attn
#weighted least squares regression model - movie DM (Discovery)- Depression 
for x in range(1,13):
    res = smf.wls("VentralAttn_peak{0}_mean ~ age + MFQ_SR_NegEmotion + female + meanFD".format(x), 
                  data = data_dm_factor.loc[data_dm_factor['site'] == "cbic",:]).fit()
    results.loc[ind, 'dataset'] = 'discovery'
    results.loc[ind, 'movie'] = 'DM'
    results.loc[ind, 'networkDV'] = 'VentralAttn'
    print(res.summary())

                              WLS Regression Results                              
Dep. Variable:     VentralAttn_peak1_mean   R-squared:                       0.023
Model:                                WLS   Adj. R-squared:                 -0.001
Method:                     Least Squares   F-statistic:                    0.9673
Date:                    Tue, 30 Aug 2022   Prob (F-statistic):              0.427
Time:                            21:15:07   Log-Likelihood:                -558.35
No. Observations:                     170   AIC:                             1127.
Df Residuals:                         165   BIC:                             1142.
Df Model:                               4                                         
Covariance Type:                nonrobust                                         
                        coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------------

In [71]:
#Default Mode
#weighted least squares regression model - movie DM (Discovery) - Depression
for x in range(1,10):
    res = smf.wls("Default_peak{0}_mean ~ age + MFQ_SR_NegEmotion + female + meanFD".format(x), 
                  data = data_dm_factor.loc[data_dm_factor['site'] == "rubic",:]).fit()
    results.loc[ind, 'dataset'] = 'discovery'
    results.loc[ind, 'movie'] = 'DM'
    results.loc[ind, 'networkDV'] = 'Default'
    print(res.summary())

                            WLS Regression Results                            
Dep. Variable:     Default_peak1_mean   R-squared:                       0.086
Model:                            WLS   Adj. R-squared:                  0.069
Method:                 Least Squares   F-statistic:                     4.962
Date:                Tue, 30 Aug 2022   Prob (F-statistic):           0.000767
Time:                        21:15:08   Log-Likelihood:                -599.05
No. Observations:                 215   AIC:                             1208.
Df Residuals:                     210   BIC:                             1225.
Df Model:                           4                                         
Covariance Type:            nonrobust                                         
                        coef    std err          t      P>|t|      [0.025      0.975]
-------------------------------------------------------------------------------------
Intercept             7.4663      1.74

In [72]:
#Default Mode
#weighted least squares regression model - movie DM (Discovery) - Depression
for x in range(1,10):
    res = smf.wls("Default_peak{0}_mean ~ age + MFQ_SR_NegEmotion + female + meanFD".format(x), 
                  data = data_dm_factor.loc[data_dm_factor['site'] == "cbic",:]).fit()
    results.loc[ind, 'dataset'] = 'discovery'
    results.loc[ind, 'movie'] = 'DM'
    results.loc[ind, 'networkDV'] = 'Default'
    print(res.summary())

                            WLS Regression Results                            
Dep. Variable:     Default_peak1_mean   R-squared:                       0.028
Model:                            WLS   Adj. R-squared:                  0.004
Method:                 Least Squares   F-statistic:                     1.179
Date:                Tue, 30 Aug 2022   Prob (F-statistic):              0.322
Time:                        21:15:08   Log-Likelihood:                -514.92
No. Observations:                 170   AIC:                             1040.
Df Residuals:                     165   BIC:                             1056.
Df Model:                           4                                         
Covariance Type:            nonrobust                                         
                        coef    std err          t      P>|t|      [0.025      0.975]
-------------------------------------------------------------------------------------
Intercept             8.0986      2.19

## Mediation Analysis

In [73]:
import pingouin as pg

# test if membership to each group predicts depression via activation to peak9 scene
# this is unlikely since we don't see an association between activation and depression scores.
data_dm.loc[:,['class1','class2','class3']] = 0
data_dm.loc[data_dm['class_string']=='class01','class1'] = 1
data_dm.loc[data_dm['class_string']=='class02','class2'] = 1
data_dm.loc[data_dm['class_string']=='class03','class3'] = 1

pg.mediation_analysis(data=data_dm, x='class1', y='MFQ_SR_Total', m='VentralAttn_peak9_mean', 
                      covar=['age', 'female', 'meanFD'], seed=42)

KeyError: 'class_string'

In [None]:
pg.mediation_analysis(data=data_dm, x='class2', y='MFQ_SR_Total', m='VentralAttn_peak9_mean', 
                      covar=['age', 'female', 'meanFD'], seed=42)

  return warn(


In [11]:
pg.mediation_analysis(data=data_dm, x='class3', y='MFQ_SR_Total', m='VentralAttn_peak9_mean', 
                      covar=['age', 'female', 'meanFD'], seed=42)

Unnamed: 0,path,coef,se,pval,CI[2.5%],CI[97.5%],sig
0,VentralAttn_peak9_mean ~ X,1.432669,0.713479,0.045346,0.029831,2.835506,Yes
1,Y ~ VentralAttn_peak9_mean,0.007599,0.00725,0.295239,-0.006656,0.021854,No
2,Total,0.062176,0.101727,0.54143,-0.137839,0.26219,No
3,Direct,0.05183,0.102265,0.612572,-0.149244,0.252905,No
4,Indirect,0.010345,0.012223,0.384,-0.008246,0.044318,No
