# Vizualization of results

In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib as mpl
import numpy as np

# sns.set(rc={"figure.figsize": (20, 20)})

In [None]:
colors = colors = sns.color_palette("Paired")

In [None]:
for i in range(0, len(colors), 2):
    r,g,b = colors[i]
    pivot = max(r,g,b)
    if r == pivot:
        colors[i] = (r, min(g*1.2, 1), min(b*1.25, 1))
    elif g == pivot:
        colors[i] = (min(r*1.2, 1), g, min(b*1.4, 1))
    else:
        colors[i] = (min(r*1.25, 1), min(g*1.1, 1), b)

In [None]:
def bar_transparent(x,y,col,data,ci='sd'):
    sns.set_style("whitegrid")
    col_wrap = None if col==None else 5
    
    hue_categories = ['lr_en not significant','lr_en', 'svc_lin not significant', 'svc_lin', 'svc_rbf not significant', 'svc_rbf']
    hue_categories_colors = colors

    data = data.reset_index()
    hue_column = "statistical significance "
    data[hue_column] = data.apply(lambda row: row[x] if row["p-value"] < 0.05 else row[x]+" not significant", axis=1)
    
    palette = dict(zip(hue_categories, hue_categories_colors))
    ax = sns.catplot(
        x=x,
        y=y,
        col=col,
        col_order=['ICA_15_bins', 'ICA_4_bins', 'PCA_15_bins', 'PCA_4_bins', 'ERP_bins', 'ICA_15_bins_functions', 'ICA_4_bins_functions','ERP_bins_functions'],
        hue=hue_column,
        ci=ci,
        data=data, 
        kind='bar', 
        capsize=.05,
        errwidth = 1,
        legend=True,
        col_wrap=col_wrap,
        dodge=False,
        palette=palette,
        margin_titles=True
    )
    
    ax.savefig("rumination_classification_summary.png")

    return ax

In [None]:
def bar_grayed(x,y,col,data,ci='sd'):
    sns.set_style("whitegrid")
    col_wrap = None if col==None else 4
        
    gray = (0.7019607843137254, 0.7019607843137254, 0.7019607843137254)
    colors = sns.color_palette("dark")

    data = data.reset_index()
    hue_column = "statistical significance "
    data[hue_column] = data.apply(lambda row: row[x] if row["p-value"] < 0.05 else "not significant", axis=1)
    
    unique = data[hue_column].unique()
    palette = dict(zip(unique, colors))
    palette.update({"not significant":gray})
    ax = sns.catplot(
        x=x,
        y=y,
        col=col,
        col_order=['ICA_15_bins', 'ICA_4_bins', 'PCA_15_bins', 'PCA_4_bins', 'ERP_bins', 'ICA_15_bins_functions', 'ICA_4_bins_functions','ERP_bins_functions'],
        hue=hue_column,
        ci=ci,
        data=data, 
        kind='bar', 
        capsize=.05,
        errwidth = 1,
        legend=True,
        col_wrap=col_wrap,
        dodge=False,
        palette=palette
    )
    
    ax.savefig("rumination_classification_summary.png")

    return ax

In [None]:
file_name_ICA = "../../data/results_classification/classification_ICA_vizualization_error.pkl"
results_ICA = pd.read_pickle(file_name_ICA)

file_name_PCA = "../../data/results_classification/classification_PCA_vizualization_error.pkl"
results_PCA = pd.read_pickle(file_name_PCA)

file_name_ERP = "../../data/results_classification/classification_ERP_vizualization_error.pkl"
results_ERP = pd.read_pickle(file_name_ERP)

In [None]:
results_df = pd.concat([results_ICA, results_PCA, results_ERP], ignore_index=True)

In [None]:
# results_df

For plotting error bars - ugly hack

In [None]:
duplicates=1000

#duplicate observations to get good std bars
dfCopy = results_df.loc[results_df.index.repeat(duplicates)].copy()
dfCopy['mean_cv_balanced_accuracy'] = np.random.normal(dfCopy['mean_cv_balanced_accuracy'].values,dfCopy['std_cv_balanced_accuracy'].values)
dfCopy['mean_cv_precision'] = np.random.normal(dfCopy['mean_cv_precision'].values,dfCopy['std_cv_precision'].values)

### Pipelines and Models

In [None]:
bar_transparent(x="model", y="mean_cv_balanced_accuracy", col="pipeline_name", ci='sd', data=dfCopy)

In [None]:
significant_models = dfCopy[dfCopy['p-value'] <= 0.05]

In [None]:
bar_transparent(x="model", y="mean_cv_balanced_accuracy", col="pipeline_name", ci='sd', data=significant_models)

In [None]:
# summarize_df = results_df[results_df['p-value'] <= 0.05]
summarize_df = results_df
summarize_df = summarize_df[["data_set", "mean_cv_balanced_accuracy", "mean_train_balanced_accuracy", "pipeline_name", "model", "std_cv_balanced_accuracy", "p-value", 'parameters']]
summarize_df = summarize_df.reset_index()
columns_order = ["data_set", "pipeline_name",  "model", "mean_train_balanced_accuracy", "mean_cv_balanced_accuracy", "std_cv_balanced_accuracy", "p-value", "parameters"]
summarize_df = summarize_df[columns_order].rename(columns = {'mean_train_balanced_accuracy': 'mean train accuracy', 'mean_cv_balanced_accuracy': 'mean test accuracy', "std_cv_balanced_accuracy": "std"}, inplace = False)

summarize_df['spatial filter components'] = summarize_df['parameters'].apply(lambda x: x['ica__n_components'] if x.get('ica__n_components') is not None else (x['spatial_filter__n_components'] if x.get('spatial_filter__n_components') is not None else '-' ))
summarize_df['selected features'] = summarize_df['parameters'].apply(lambda x: x['feature_selection__n_components'])
summarize_df = summarize_df.drop(columns=['parameters'])

In [None]:
with pd.option_context("display.max_colwidth", -1):
    display(summarize_df)