# Internal consistency

### Imports

In [None]:
import os
import re
import glob
import os
import ast
import os.path as op
from collections import defaultdict
from copy import deepcopy
import copy

import pickle
from time import time
import pywt
import mne
import scipy
import numpy as np
import plotly.graph_objects as go
import plotly.express as px
import pandas as pd
import cesium.featurize
from plotly.subplots import make_subplots
from ipywidgets import Dropdown, FloatRangeSlider, IntSlider, FloatSlider, interact
from sklearn.metrics import roc_auc_score
from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import train_test_split
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.base import TransformerMixin, BaseEstimator

import sys

from sklearn.pipeline import Pipeline, FeatureUnion
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import FunctionTransformer
from sklearn.svm import SVR
from sklearn.decomposition import FastICA
from sklearn.decomposition import PCA

from rumination_experiment_transformers_averaged_CDS import *

import warnings

warnings.filterwarnings("ignore")


In [None]:
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
# pd.set_option('display.max_colwidth', None)

---
## Loading data

Load pickled results

In [None]:
columns = ['mean_train_r2', 'mean_cv_r2', 'p-value', 'external_score',  'external_p-value' , 'scale']

In [None]:
rumination = "16-Rumination Full Scale"
dass_anxiety = "05-DASS-21 Anxiety scale"
stai_t = "04-STAI Trait SUM" 
bis = "07-BIS"
bas_dzialanie = "07-BAS Dzialanie"
bas_przyjemnosc = "07-BAS Poszukiwanie przyjemnosci"
bas_nagroda = "07-BAS Wrazliwosc na nagrode"
washing = "14-Obsessive-Compulsive WASHING"
obsessing = "14-Obsessive-Compulsive OBSESSING"
hoarding = "14-Obsessive-Compulsive HOARDING"
ordering = "14-Obsessive-Compulsive ORDERING"
checking = "14-Obsessive-Compulsive CHECKING"
neutralizing = "14-Obsessive-Compulsive NEUTRALIZING"
oci_r_full = "14-Obsessive-Compulsive FULL"
threat = "15-Obsessional Beliefs - Overestimation of threat"
perfectionism_IU = "15-Obsessional Beliefs - Perfectionism/ Intolerance of uncertainty"
thought_suppression = "18-Thought Suppression Inventory"
nonforgivness = "22-Nonforgiveness - Full Scale"
indecisivness = "27-Indecisiveness Scale_Frost"
IU_prospecitve = "28-Intolerance of Uncertainty - Prospective Anxiety"
IU_inhibitory = "28-Intolerance of Uncertainty - Inhibitory Anxiety"
self_esteem = "06-Self-Esteem Scale_SES Rosenberga"

scales = [
    self_esteem,
    rumination,
    dass_anxiety,
    stai_t,
    bis,
    washing,
    obsessing,
    hoarding,
    ordering,
    checking,
    neutralizing,
    oci_r_full,
    threat,
    thought_suppression,
    IU_prospecitve,
    IU_inhibitory,
]

scales_dict = {
    "16-Rumination Full Scale": "RRQ",
    "05-DASS-21 Anxiety scale": "DASS-21 Anx",
    "04-STAI Trait SUM": "STAI-T",
    "07-BIS": "BIS",
    "14-Obsessive-Compulsive WASHING": "Washing",
    "14-Obsessive-Compulsive OBSESSING": "Obsessing",
    "14-Obsessive-Compulsive HOARDING": "Hoarding",
    "14-Obsessive-Compulsive ORDERING": "Ordering",
    "14-Obsessive-Compulsive CHECKING": "Checking",
    "14-Obsessive-Compulsive NEUTRALIZING": "Neutralizing",
    "14-Obsessive-Compulsive FULL": "OCI-R",
    "15-Obsessional Beliefs - Overestimation of threat": "OT",
    "18-Thought Suppression Inventory": "WBSI",
    "28-Intolerance of Uncertainty - Prospective Anxiety": "IUS-P",
    "28-Intolerance of Uncertainty - Inhibitory Anxiety": "IUS-I",
    "06-Self-Esteem Scale_SES Rosenberga": "SES",
              }

In [None]:
# results_ern = pd.read_pickle("../data/results/regression_union_100-600_cached_ern_amplitude_various_scales_2.pkl")
# results_pe = pd.read_pickle("../data/results/regression_union_100-600_cached_pe_amplitude_various_scales_2.pkl")
# results_plain = pd.read_pickle("../data/results/regression_union_100-600_cached_plain_pca_various_traits.pkl")

In [None]:
results_ern = pd.read_pickle("../data/results/regression_union_100-600_cached_ern_amplitude_various_scales_with_external_p.pkl")
results_pe = pd.read_pickle("../data/results/regression_union_100-600_cached_pe_amplitude_various_scales_with_external_p.pkl")

In [None]:
results_ern.pre_processed_pipeline[10]

In [None]:
current_results = results_pe
current_results = current_results[current_results['scale'].isin(scales)]

### Significant

In [None]:
results_significant = current_results[(current_results['mean_train_r2'] > 0) 
        & (current_results['mean_cv_r2'] >= 0) 
        & (current_results['external_score'] >= 0)  
        & (current_results['p-value'] < 0.05)]

In [None]:
results_significant[columns]

In [None]:
results_significant[columns]

In [None]:
results_idx = results_significant.groupby(['scale'])['mean_cv_r2'].transform(max) == results_significant['mean_cv_r2']
results_significant_unique = results_significant[results_idx]
results_significant_unique[columns]

----------

In [None]:
results_ern_IU = pd.read_pickle("../data/results/regression_union_100-600_cached_ern_amplitude_various_scales_5-7_with_external_p.pkl")
results_pe_IU = pd.read_pickle("../data/results/regression_union_100-600_cached_pe_amplitude_various_scales_5-7_with_external_p.pkl")

In [None]:
current_IU = results_pe_IU

In [None]:
results_significant_IU = current_IU[(current_IU['mean_train_r2'] > 0) 
        & (current_IU['mean_cv_r2'] >= 0) 
        & (current_IU['external_score'] >= 0)  
        & (current_IU['p-value'] < 0.05)]

In [None]:
results_significant_IU[columns]

In [None]:
results_significant_IU_idx = results_significant_IU.groupby(['scale'])['mean_cv_r2'].transform(max) == results_significant_IU['mean_cv_r2']
results_significant_IU_unique = results_significant_IU[results_significant_IU_idx]
results_significant_IU_unique[columns]

In [None]:
# concatenate significant results
results_significant_df = pd.concat([results_significant_unique, results_significant_IU_unique], ignore_index = True)

In [None]:
# only for pe
results_significant_df = results_significant_unique

### Non significant

In [None]:
significant_scales = results_significant_df["scale"].tolist()

In [None]:
results_non_significant = current_results[~current_results['scale'].isin(significant_scales)]

In [None]:
results_non_significant_idx = results_non_significant.groupby(['scale'])['mean_cv_r2'].transform(max) == results_non_significant['mean_cv_r2']
results_non_significant_unique = results_non_significant[results_non_significant_idx].drop_duplicates('scale')
results_non_significant_unique[columns]

### All results

In [None]:
results_all = pd.concat([results_significant_df, results_non_significant_unique], ignore_index = True)

In [None]:
results_all = results_all.sort_values(by='scale',  ignore_index=True)

In [None]:
results_all[columns]

In [None]:
# # for ERN
# results_all = results_all.reindex([0,6,7,2,1,14,4,15,5,13,11,9,3,10,12,8])

# for Pe
# results_all = results_all.reindex([4, 5,1, 2,0, 14, 13, 3,15, 12, 10, 8, 7, 9, 11, 6])

In [None]:
results_all['scale'] = results_all['scale'].replace(scales_dict)

In [None]:
results_all[columns]

In [None]:
results_all.to_pickle("../data/results/pe_results_with_external-p.pkl")

# Filtered results

In [None]:
results_all_pe = pd.read_pickle("../data/results/pe_results_with_external-p.pkl")
results_all_ern = pd.read_pickle("../data/results/ern_results_with_external-p.pkl")

In [None]:
results_all_ern[columns]

In [None]:
results_all_pe[columns]

In [None]:
condition = 'ern'
current_results_all = results_all_ern

In [None]:
current_results_all['external_score'] = current_results_all['external_score'] .apply(lambda x: -0.0025 if abs(x)<0.002 else x)
current_results_all['mean_cv_r2'] = current_results_all['mean_cv_r2'] .apply(lambda x: -0.0025 if abs(x)<0.002 else x)

In [None]:
# split each record to two records: internal and external for drawing

df = pd.DataFrame()

# results = results.reset_index()  # make sure indexes pair with number of rows
for index, row in current_results_all.iterrows():
    #####internal######
    data = {
            "train_score": row["mean_train_r2"],
            "test_score": row["mean_cv_r2"],
            "sd": row["std_cv_r2"],
            "p-value": row["p-value"],
            "scale": row["scale"],
            "dataset": 'internal',
        }

    df = df.append(data, ignore_index=True)
    
    #####external######
    data = {
            "train_score": 0,
            "test_score": row["external_score"],
            "sd": 0,
            "p-value": row['external_p-value'],
            "scale": row["scale"],
            "dataset": 'external',
        }

    df = df.append(data, ignore_index=True)

In [None]:
df_external = df[df['dataset'] == 'external']

In [None]:
df_internal = df[df['dataset'] == 'internal']

In [None]:
# hack for plotting error bars
duplicates=1000

#duplicate observations to get good std bars
df_internal_copy = df_internal.loc[df_internal.index.repeat(duplicates)].copy()
df_internal_copy['test_score'] = np.random.normal(df_internal_copy['test_score'].values,df_internal_copy['sd'].values)

In [None]:
df_all = pd.concat([df_internal_copy, df_external], ignore_index=True)

In [None]:
labels = df_internal['scale'].tolist()

labels_stretched = [
    "$\mathregular{DASS-21 Anx}$",
    "$\mathregular{STAI-T}$",
    "$\mathregular{BIS}$",
    "$\mathregular{RRQ}$",
    "$\mathregular{WBSI}$",
    "$\mathregular{OT}$",
    "$\mathregular{IUS-P}$",
    "$\mathregular{IUS-I}$",
    "$\mathregular{OCI-R}$",
    "$\mathregular{C\,h\,e\,c\,k\,i\,n\,g}$",
    "$\mathregular{H\,o\,a\,r\,d\,i\,n\,g}$",
    "$\mathregular{O\,b\,s\,e\,s\,s\,i\,n\,g}$",
    "$\mathregular{O\,r\,d\,e\,r\,i\,n\,g}$",
    "$\mathregular{N\,e\,u\,t\,r\,a\,l\,i\,z\,i\,n\,g}$",
    "$\mathregular{W\,a\,s\,h\,i\,n\,g}$",
    "$\mathregular{SES}$",

]

In [None]:
def set_ern_significance_stars():
    # DASS
    plt.text(-0.09,-0.04, '*', fontsize = 5)
    # plt.text(0.07,1.5, '*', fontsize = 5)
    
    # STAI-T
    plt.text(-0.035,0.94, '**', fontsize = 5)
    # plt.text(-0.014,2.5, '*', fontsize = 16)

    # BIS
    plt.text(-0.035,1.95, '**', fontsize = 5)
    # plt.text(-0.024,3.35, '*', fontsize = 5)

    # RRQ
    plt.text(-0.035,2.95, '**', fontsize = 5)
    plt.text(-.027,3.34, '*', fontsize = 5)

    # WBSI
    plt.text(-0.052,3.91, '+', fontsize = 5, weight='bold')
    # plt.text(0.07,5.5, '*', fontsize = 16)

    # OT
    plt.text(-0.068,4.95, '**', fontsize = 5)
    plt.text(-.027, 5.32, '+', fontsize = 5, weight='bold')

    # IUS-P
    plt.text(-0.075,5.95, '**', fontsize = 5)
    # plt.text(-0.014,7.5, '*', fontsize = 16)

    # IUS-I
    plt.text(-0.027,6.95, '*', fontsize = 5)
    plt.text(-.027,7.34, '+', fontsize = 5, weight='bold')

    # OCI-R
    plt.text(-0.085,7.95, '*', fontsize = 5)
    # plt.text(0.07,7.5, '*', fontsize = 16)

    # checking
    plt.text(-0.17,8.95, '**', fontsize = 5)
    # plt.text(0.07,8., '*', fontsize = 16)

    #hoarding
    plt.text(-0.045,9.95, '**', fontsize = 5)
    # plt.text(0.07,11.5, '*', fontsize = 16)

    #obsessing
    plt.text(-0.05,10.95, '*', fontsize = 5)
    # plt.text(0.07,12.5, '*', fontsize = 16)

    # ordering
    plt.text(-0.118,11.92, '+', fontsize = 5, weight='bold')
    # plt.text(0.07,13.5, '*', fontsize = 16)
    
    # neutralizing
    plt.text(-0.125,12.91, '+', fontsize = 5,weight='bold')
    # plt.text(0.07,12.5, '*', fontsize = 16)
    
    # washing
    # plt.text(0.07,13.95, '*', fontsize = 16)
    # plt.text(0.07,13.5, '*', fontsize = 16)

    # SES
    plt.text(-0.054,14.95, '**', fontsize = 5)
    plt.text(-.027,15.26, '+', fontsize = 5, weight='bold')

In [None]:
def set_pe_significance_stars():
    # DASS
    plt.text(-0.07,-0.09, '+', fontsize = 5, weight='bold')
    # plt.text(0.07,1.5, '*', fontsize = 5)
    
    # STAI-T
    plt.text(-0.042,0.95, '*', fontsize = 5)
    # plt.text(-0.014,2.5, '*', fontsize = 16)

    # BIS
    plt.text(-0.029,1.96, '*', fontsize = 5)
    plt.text(-0.029,2.3, '+', fontsize = 5, weight='bold')

    # RRQ
    plt.text(-0.035,2.95, '**', fontsize = 5)
    plt.text(-.029,3.34, '*', fontsize = 5)

    # WBSI
    plt.text(-0.029,3.9, '+', fontsize = 5, weight='bold')
    plt.text(-0.029,4.35, '*', fontsize = 5)

    # OT
    # plt.text(-0.07,4.97, '**', fontsize = 5)
    # plt.text(-.064, 5.32, '+', fontsize = 5, weight='bold')

    # IUS-P
    plt.text(-0.034,5.95, '**', fontsize = 5)
    plt.text(-0.034,6.35, '**', fontsize = 5)

    # IUS-I
    # plt.text(-0.027,6.95, '*', fontsize = 5)
    # plt.text(-.027,7.34, '+', fontsize = 5, weight='bold')

    # OCI-R
    # plt.text(-0.088,7.95, '*', fontsize = 5)
    # plt.text(0.07,7.5, '*', fontsize = 16)

    # checking
    plt.text(-0.029,8.95, '*', fontsize = 5)
    # plt.text(0.07,8., '*', fontsize = 16)

    #hoarding
    plt.text(-0.035,9.95, '*', fontsize = 5)
    # plt.text(0.07,11.5, '*', fontsize = 16)

    #obsessing
    plt.text(-0.061,10.95, '*', fontsize = 5)
    # plt.text(0.07,12.5, '*', fontsize = 16)

    # ordering
    # plt.text(-0.118,11.92, '+', fontsize = 5, weight='bold')
    # plt.text(0.07,13.5, '*', fontsize = 16)
    
    # neutralizing
    # plt.text(-0.125,12.91, '+', fontsize = 5,weight='bold')
    # plt.text(0.07,12.5, '*', fontsize = 16)
    
    # washing
    plt.text(-0.24,13.97, '*', fontsize = 5)
    # plt.text(0.07,13.5, '*', fontsize = 16)

    # SES
    plt.text(-0.062,14.95, '*', fontsize = 5)
    # plt.text(-.027,15.26, '+', fontsize = 5, weight='bold')

In [None]:
import seaborn as sns
import matplotlib.pylab as plt

order_list = ['DASS-21 Anx', 'STAI-T', 'BIS', 'RRQ', 
              'WBSI', 'OT', 'IUS-P', 'IUS-I', 'OCI-R', 
              'Checking', 'Hoarding', 'Obsessing', 'Ordering',
              'Neutralizing', 'Washing', 'SES']


sns.set_style("white")

cm = 1/2.54
dpi = 200

plt.rcParams['figure.dpi'] = dpi
plt.rcParams['figure.figsize'] = [9*cm,12*cm]
plt.rcParams["font.family"] = "Times New Roman"
plt.rcParams['ytick.labelsize'] = 5
plt.rcParams['xtick.labelsize'] = 5
plt.rcParams['axes.labelsize'] = 6
plt.rcParams["axes.edgecolor"] = ".15"
plt.rcParams["axes.linewidth"]  = 0.3

plt.tick_params(axis='both', which='major', bottom=True, left = True)


pal = sns.color_palette('deep')
colors = [pal.as_hex()[3], pal.as_hex()[2]]

sns.set_palette(sns.color_palette(colors))

ax = sns.barplot(
        y='scale',
        x='test_score',
        hue='dataset',
        order=order_list,
        data=df_all, 
        orient ='h',
        ci='sd',
        capsize=.1,
        errwidth = 0.7,
        ec='k',
        linewidth=0.3
)

ax.set(xlabel="$\mathregular{R^{2}}$", ylabel='')

plt.legend(fontsize=5)
plt.yticks([i for i in range(16)], labels_stretched)

# Pe
# set_pe_significance_stars()

# ERN
# set_ern_significance_stars()

plt.show()

ax.figure.savefig(f"paper_images/{condition}_scores-detailed_summary_no-p_dpi_{dpi}.png", bbox_inches='tight')