# Results summary

### Imports

In [50]:
import os
import re
import glob
import os
import ast
import os.path as op
from collections import defaultdict
from copy import deepcopy
import copy

import pickle
from time import time
import pywt
import mne
import scipy
import numpy as np
import plotly.graph_objects as go
import plotly.express as px
import pandas as pd
import cesium.featurize
from plotly.subplots import make_subplots
from ipywidgets import Dropdown, FloatRangeSlider, IntSlider, FloatSlider, interact
from sklearn.metrics import roc_auc_score
from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import train_test_split
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.base import TransformerMixin, BaseEstimator

import sys

from sklearn.pipeline import Pipeline, FeatureUnion
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import FunctionTransformer
from sklearn.svm import SVR
from sklearn.decomposition import FastICA
from sklearn.decomposition import PCA

from rumination_experiment_transformers_averaged_CDS import *

import warnings

warnings.filterwarnings("ignore")


In [51]:
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)

---
## Loading data

Load pickled results

In [100]:
columns = ['mean_train_r2', 'mean_cv_r2', 'p-value', 'external_score',  'external_p-value' , 'scale', 'model']

In [101]:
rumination = "16-Rumination Full Scale"
dass_anxiety = "05-DASS-21 Anxiety scale"
stai_t = "04-STAI Trait SUM" 
bis = "07-BIS"
bas_dzialanie = "07-BAS Dzialanie"
bas_przyjemnosc = "07-BAS Poszukiwanie przyjemnosci"
bas_nagroda = "07-BAS Wrazliwosc na nagrode"
washing = "14-Obsessive-Compulsive WASHING"
obsessing = "14-Obsessive-Compulsive OBSESSING"
hoarding = "14-Obsessive-Compulsive HOARDING"
ordering = "14-Obsessive-Compulsive ORDERING"
checking = "14-Obsessive-Compulsive CHECKING"
neutralizing = "14-Obsessive-Compulsive NEUTRALIZING"
oci_r_full = "14-Obsessive-Compulsive FULL"
threat = "15-Obsessional Beliefs - Overestimation of threat"
perfectionism_IU = "15-Obsessional Beliefs - Perfectionism/ Intolerance of uncertainty"
thought_suppression = "18-Thought Suppression Inventory"
nonforgivness = "22-Nonforgiveness - Full Scale"
indecisivness = "27-Indecisiveness Scale_Frost"
IU_prospecitve = "28-Intolerance of Uncertainty - Prospective Anxiety"
IU_inhibitory = "28-Intolerance of Uncertainty - Inhibitory Anxiety"
self_esteem = "06-Self-Esteem Scale_SES Rosenberga"

scales = [
    self_esteem,
    rumination,
    dass_anxiety,
    stai_t,
    bis,
    washing,
    obsessing,
    hoarding,
    ordering,
    checking,
    neutralizing,
    oci_r_full,
    threat,
    thought_suppression,
    IU_prospecitve,
    IU_inhibitory,
]

scales_dict = {
    "16-Rumination Full Scale": "RRQ",
    "05-DASS-21 Anxiety scale": "DASS-21 Anx",
    "04-STAI Trait SUM": "STAI-T",
    "07-BIS": "BIS",
    "14-Obsessive-Compulsive WASHING": "Washing",
    "14-Obsessive-Compulsive OBSESSING": "Obsessing",
    "14-Obsessive-Compulsive HOARDING": "Hoarding",
    "14-Obsessive-Compulsive ORDERING": "Ordering",
    "14-Obsessive-Compulsive CHECKING": "Checking",
    "14-Obsessive-Compulsive NEUTRALIZING": "Neutralizing",
    "14-Obsessive-Compulsive FULL": "OCI-R",
    "15-Obsessional Beliefs - Overestimation of threat": "OT",
    "18-Thought Suppression Inventory": "WBSI",
    "28-Intolerance of Uncertainty - Prospective Anxiety": "IUS-P",
    "28-Intolerance of Uncertainty - Inhibitory Anxiety": "IUS-I",
    "06-Self-Esteem Scale_SES Rosenberga": "SES",
              }

In [102]:
results_ern = pd.read_pickle("../public_data/results/models_pickles/regression_union_100-600_cached_ern_amplitude_various_scales_with_external_p.pkl")
results_pe = pd.read_pickle("../public_data/results/models_pickles/regression_union_100-600_cached_pe_amplitude_various_scales_with_external_p.pkl")

In [103]:
results_ern_IU = pd.read_pickle("../public_data/results/models_pickles/regression_union_100-600_cached_ern_amplitude_various_scales_IU_with_external_p.pkl")
results_pe_IU = pd.read_pickle("../public_data/results/models_pickles/regression_union_100-600_cached_pe_amplitude_various_scales_IU_with_external_p.pkl")

## ERN summary

In [105]:
# extract resulst of IU-Inhibitory experiment
results_ern_IU = results_ern_IU[results_ern_IU['scale'] == '28-Intolerance of Uncertainty - Inhibitory Anxiety'].iloc[-12:]
results_ern_IU[columns]

Unnamed: 0,mean_train_r2,mean_cv_r2,p-value,external_score,external_p-value,scale,model
42,0.015048,-0.003498,0.028971,-0.130953,0.466533,28-Intolerance of Uncertainty - Inhibitory Anx...,svr
43,0.067293,-0.026402,0.290709,-0.095979,0.959041,28-Intolerance of Uncertainty - Inhibitory Anx...,kr
44,0.070784,-0.029344,0.312687,-0.101671,0.478521,28-Intolerance of Uncertainty - Inhibitory Anx...,en
45,0.127674,-0.019884,0.155844,0.005241,0.108891,28-Intolerance of Uncertainty - Inhibitory Anx...,svr
46,0.175661,0.029852,0.021978,0.047695,0.084915,28-Intolerance of Uncertainty - Inhibitory Anx...,kr
47,0.0,-0.034134,0.447552,-0.010137,0.975025,28-Intolerance of Uncertainty - Inhibitory Anx...,en
48,0.155707,0.007049,0.052947,0.025843,0.092907,28-Intolerance of Uncertainty - Inhibitory Anx...,svr
49,0.293927,0.137673,0.000999,-0.038856,0.156843,28-Intolerance of Uncertainty - Inhibitory Anx...,kr
50,0.124229,0.036322,0.013986,0.056105,0.06993,28-Intolerance of Uncertainty - Inhibitory Anx...,en
51,0.567835,-0.007381,0.044955,-0.170057,0.204795,28-Intolerance of Uncertainty - Inhibitory Anx...,svr


In [107]:
# change indexes of IU results to update main results DF
results_ern_IU = results_ern_IU.set_index(pd.Index(np.arange(168, 180)))
results_ern.update(results_ern_IU)

In [111]:
# filter scales of interest
results_ern = results_ern[results_ern['scale'].isin(scales)]

#### Extract best significant results

In [113]:
current_results = results_ern

In [114]:
results_significant = current_results[(current_results['mean_train_r2'] > 0) 
        & (current_results['mean_cv_r2'] >= 0) 
        & (current_results['external_score'] >= 0)  
        & (current_results['p-value'] < 0.05)]

Extract best significant model for each estimator

In [116]:
results_idx = results_significant.groupby(['scale', 'model'])['mean_cv_r2'].transform(max) == results_significant['mean_cv_r2']
results_significant_unique = results_significant[results_idx]
results_significant_unique[columns]

Unnamed: 0,mean_train_r2,mean_cv_r2,p-value,external_score,external_p-value,scale,model
1,0.125565,0.06896,0.003996,0.07052,0.017982,16-Rumination Full Scale,kr
6,0.061035,0.045102,0.003996,0.014335,0.215784,16-Rumination Full Scale,svr
11,0.104305,0.054987,0.008991,0.023258,0.105894,16-Rumination Full Scale,en
47,0.125939,0.090101,0.001998,0.012885,0.124875,07-BIS,en
60,0.049587,0.005228,0.034965,0.006655,0.28971,14-Obsessive-Compulsive OBSESSING,svr
61,0.092647,0.030011,0.015984,0.006548,0.214785,14-Obsessive-Compulsive OBSESSING,kr
142,0.196231,0.055361,0.006993,0.059436,0.057942,15-Obsessional Beliefs - Overestimation of threat,kr
172,0.175661,0.029852,0.021978,0.047695,0.084915,28-Intolerance of Uncertainty - Inhibitory Anx...,kr
176,0.124229,0.036322,0.013986,0.056105,0.06993,28-Intolerance of Uncertainty - Inhibitory Anx...,en
190,0.208191,0.071807,0.006993,0.019947,0.093906,06-Self-Esteem Scale_SES Rosenberga,kr


#### Extract best (not necessary significant) models for each estimator - based on the highest mean CV score

In [117]:
results_idx = current_results.groupby(['scale', 'model'])['mean_cv_r2'].transform(max) == current_results['mean_cv_r2']
results_unique_models = current_results[results_idx]

In [118]:
results_unique_models = results_unique_models.drop_duplicates(subset=['mean_cv_r2', 'model', 'scale'])
results_unique_models[columns]

Unnamed: 0,mean_train_r2,mean_cv_r2,p-value,external_score,external_p-value,scale,model
0,0.293081,0.049043,0.005994,-0.147834,0.378621,16-Rumination Full Scale,svr
1,0.125565,0.06896,0.003996,0.07052,0.017982,16-Rumination Full Scale,kr
11,0.104305,0.054987,0.008991,0.023258,0.105894,16-Rumination Full Scale,en
13,0.03789,-0.01475,0.125874,-0.077957,0.549451,05-DASS-21 Anxiety scale,kr
14,0.033512,-0.021767,0.211788,-0.06747,0.71029,05-DASS-21 Anxiety scale,en
18,0.108733,0.012977,0.023976,-0.147534,0.274725,05-DASS-21 Anxiety scale,svr
24,0.347438,0.123885,0.000999,-0.255872,0.818182,04-STAI Trait SUM,svr
34,0.212917,0.142738,0.000999,-0.087499,0.37962,04-STAI Trait SUM,kr
35,0.19811,0.132219,0.000999,-0.042874,0.298701,04-STAI Trait SUM,en
45,0.095453,0.059871,0.002997,-0.040503,0.243756,07-BIS,svr


#### Update main results DF with the significant models (they are not necessery models with the highest mean CV score)

In [119]:
# ERN
indexes = pd.Index([1,0,11,47,66,67,142, 175,176,190,191])

results_significant_unique_updated_with_best_significant = results_significant_unique.set_index(indexes)
results_unique_models.update(results_significant_unique_updated_with_best_significant)

#### Replace names of scales

In [121]:
results_unique_models = results_unique_models.replace(scales_dict)

#### Leave only KR and EN models

In [122]:
models = ['en', 'kr']
results_unique_models_without_svr = results_unique_models[results_unique_models['model'].isin(models)]

#### Save results

- all

In [179]:
results_unique_models_without_svr.to_pickle("../public_data/results/models_pickles/ern_results_models_full.pkl")

- extract only most important info

In [178]:
columns = [ 'scale', 'model', 'mean_train_r2', 'mean_cv_r2', 'p-value', 'external_score', 'external_p-value']

results_unique_models_without_svr[columns].to_csv("../public_data/results/models_results/ern_results_essential.csv")

- extended summary of results

In [168]:
columns_extended = ['scale', 'model', 'mean_train_r2', 'mean_train_mse', 'mean_train_mae', 'mean_cv_r2', 'std_cv_r2', 'p-value', 'external_score',  
           'external_p-value' , 'pipeline_name', 'pre_processed_pipeline', 
            'mean_cv_mae', 'std_cv_mae', 'mean_cv_mse', 'std_cv_mse', 'parameters', 'cv_results', 'best_estimator',
          ]

results_extended = results_unique_models_without_svr[columns_extended]

Add num of PCA components

In [169]:
results_extended['PCA components num'] = results_extended['pipeline_name'].apply(lambda x: x.split('_')[1])

Add ROI

In [170]:
# add column with ROI indicator
results_extended['ROI'] = results_extended['pre_processed_pipeline'].apply(lambda x: 
                                                '1' if x[0].channels_list == ['Fpz', 'AFz', 'Fz', 'FCz', 'Cz', 'CPz', 'P1', 'Pz', 'P2'] else '2')

Add CV splits scores

In [171]:
# add columns with exact scores of CV
for index, row in results_extended.iterrows():
    
    best = row.parameters
    index_best = row.cv_results['params'].index(best)
    
    split_1_r2 = row.cv_results['split0_test_r2'][index_best]
    split_2_r2 = row.cv_results['split1_test_r2'][index_best]
    split_3_r2 = row.cv_results['split2_test_r2'][index_best]
    
    results_extended.loc[index, 'split_1_r2'] = split_1_r2
    results_extended.loc[index, 'split_2_r2'] = split_2_r2
    results_extended.loc[index, 'split_3_r2'] = split_3_r2

Add linear models coefficients

In [172]:
coef_df = pd.DataFrame(columns=['coef_1', 'coef_2', 'coef_3', 'coef_4'])

for index, row in results_extended[['best_estimator']].iterrows():
    if row.best_estimator.steps[1][0] == 'en':
        estimator = row.best_estimator.steps[1][1]
        coefs = estimator.coef_
        coefs = np.append(coefs, [None] * (4 - len(coefs))) if len(coefs) < 4 else coefs[:4]
        coef_df = coef_df.append(pd.DataFrame([coefs], columns=list(coef_df)), ignore_index=True)
    else:
        coefs = [None] * (4)
        coef_df = coef_df.append(pd.DataFrame([coefs], columns=list(coef_df)), ignore_index=True)

In [173]:
results_extended = pd.concat([results_extended.reset_index(drop=True), coef_df.reset_index(drop=True)], axis=1)

In [175]:
results_extended = results_extended.drop(['pipeline_name', 'pre_processed_pipeline', 'cv_results'], axis=1)

In [177]:
results_extended.to_csv("../public_data/results/models_results/ern_results_extended.csv")

## Pe summary

In [180]:
# extract resulst of IU-Prospective experiment
results_pe_IU = results_pe_IU[results_pe_IU['scale'] == '28-Intolerance of Uncertainty - Prospective Anxiety']

Unnamed: 0,scale,model,mean_train_r2,mean_cv_r2,p-value,external_score,external_p-value
27,28-Intolerance of Uncertainty - Prospective An...,svr,0.056174,0.026537,0.028971,0.126669,0.00999
28,28-Intolerance of Uncertainty - Prospective An...,kr,0.073625,0.029535,0.033966,0.091769,0.020979
29,28-Intolerance of Uncertainty - Prospective An...,en,0.071777,0.030346,0.031968,0.082223,0.01998
30,28-Intolerance of Uncertainty - Prospective An...,svr,0.059234,0.016911,0.03996,0.134998,0.013986
31,28-Intolerance of Uncertainty - Prospective An...,kr,0.060687,0.013915,0.057942,0.064573,0.024975
32,28-Intolerance of Uncertainty - Prospective An...,en,0.058377,0.016608,0.053946,0.046847,0.023976
33,28-Intolerance of Uncertainty - Prospective An...,svr,0.069365,0.024546,0.024975,0.092814,0.034965
34,28-Intolerance of Uncertainty - Prospective An...,kr,0.074454,0.023279,0.040959,0.042899,0.051948
35,28-Intolerance of Uncertainty - Prospective An...,en,0.097828,0.025752,0.025974,0.054578,0.056943
36,28-Intolerance of Uncertainty - Prospective An...,svr,0.049634,0.025317,0.027972,0.116312,0.008991


In [181]:
results_full_pe = pd.concat([results_pe.reset_index(drop=True), results_pe_IU.reset_index(drop=True)], axis=0)

In [184]:
# filter scales of interest
results_full_pe = results_full_pe[results_full_pe['scale'].isin(scales)]

#### Extract best models for each estimator - based on the highest mean CV score

In [190]:
current_results = results_full_pe

In [191]:
results_idx = current_results.groupby(['scale', 'model'])['mean_cv_r2'].transform(max) == current_results['mean_cv_r2']
results_unique_models = current_results[results_idx]

In [192]:
results_unique_models = results_unique_models.drop_duplicates(subset=['mean_cv_r2', 'model', 'scale'])
results_unique_models[columns]

Unnamed: 0,scale,model,mean_train_r2,mean_cv_r2,p-value,external_score,external_p-value
0,16-Rumination Full Scale,svr,0.098618,0.052768,0.005994,0.108485,0.024975
1,16-Rumination Full Scale,kr,0.095524,0.046113,0.011988,0.102511,0.015984
2,16-Rumination Full Scale,en,0.092549,0.047671,0.011988,0.099433,0.007992
30,05-DASS-21 Anxiety scale,svr,0.015114,-0.027174,0.126873,-0.08996,0.696304
32,05-DASS-21 Anxiety scale,en,0.026282,-0.024291,0.313686,-0.048449,0.893107
34,05-DASS-21 Anxiety scale,kr,0.053761,-0.002676,0.07992,-0.083297,0.878122
40,04-STAI Trait SUM,kr,0.046953,0.008272,0.042957,0.017547,0.155844
41,04-STAI Trait SUM,en,0.050579,0.008187,0.044955,0.017972,0.130869
42,04-STAI Trait SUM,svr,0.025869,0.006716,0.03996,0.01466,0.146853
61,07-BIS,kr,0.044284,0.009674,0.04995,0.052883,0.052947


#### Replace names of scales

In [193]:
results_unique_models = results_unique_models.replace(scales_dict)

#### Leave only KR and EN models

In [194]:
models = ['en', 'kr']
results_unique_models_without_svr = results_unique_models[results_unique_models['model'].isin(models)]

#### Save results

- all

In [197]:
results_unique_models_without_svr.to_pickle("../public_data/results/models_pickles/pe_results_models_full.pkl")

- extract only most important info

In [198]:
columns = [ 'scale', 'model', 'mean_train_r2', 'mean_cv_r2', 'p-value', 'external_score', 'external_p-value']

results_unique_models_without_svr[columns].to_csv("../public_data/results/models_results/pe_results_essential.csv")

- extended summary of results

In [199]:
columns_extended = ['scale', 'model', 'mean_train_r2', 'mean_train_mse', 'mean_train_mae', 'mean_cv_r2', 'std_cv_r2', 'p-value', 'external_score',  
           'external_p-value' , 'pipeline_name', 'pre_processed_pipeline', 
            'mean_cv_mae', 'std_cv_mae', 'mean_cv_mse', 'std_cv_mse', 'parameters', 'cv_results', 'best_estimator',
          ]

results_extended = results_unique_models_without_svr[columns_extended]

Add num of PCA components

In [200]:
results_extended['PCA components num'] = results_extended['pipeline_name'].apply(lambda x: x.split('_')[1])

Add ROI

In [201]:
results_extended['ROI'] = results_extended['pre_processed_pipeline'].apply(lambda x: 
                                                '1' if x[0].channels_list == ['Fpz', 'AFz', 'Fz', 'FCz', 'Cz', 'CPz', 'P1', 'Pz', 'P2'] else '2')

Add CV splits scores

In [202]:
for index, row in results_extended.iterrows():
    
    best = row.parameters
    index_best = row.cv_results['params'].index(best)
    
    split_1_r2 = row.cv_results['split0_test_r2'][index_best]
    split_2_r2 = row.cv_results['split1_test_r2'][index_best]
    split_3_r2 = row.cv_results['split2_test_r2'][index_best]
    
    results_extended.loc[index, 'split_1_r2'] = split_1_r2
    results_extended.loc[index, 'split_2_r2'] = split_2_r2
    results_extended.loc[index, 'split_3_r2'] = split_3_r2

Add linear models coefficients

In [203]:
coef_df = pd.DataFrame(columns=['coef_1', 'coef_2', 'coef_3', 'coef_4'])

for index, row in results_extended[['best_estimator']].iterrows():
    if row.best_estimator.steps[1][0] == 'en':
        estimator = row.best_estimator.steps[1][1]
        coefs = estimator.coef_
        coefs = np.append(coefs, [None] * (4 - len(coefs))) if len(coefs) < 4 else coefs[:4]
        coef_df = coef_df.append(pd.DataFrame([coefs], columns=list(coef_df)), ignore_index=True)
    else:
        coefs = [None] * (4)
        coef_df = coef_df.append(pd.DataFrame([coefs], columns=list(coef_df)), ignore_index=True)

In [204]:
results_extended = pd.concat([results_extended.reset_index(drop=True), coef_df.reset_index(drop=True)], axis=1)

In [205]:
results_extended = results_extended.drop(['pipeline_name', 'pre_processed_pipeline', 'cv_results'], axis=1)

In [206]:
results_extended.to_csv("../public_data/results/models_results/pe_results_extended.csv")