In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
%pylab inline

In [None]:
import os
import sys
import pathlib
import pickle

sys.path.append('..')

In [None]:
import seaborn as sns
import matplotlib.style as style
import pandas as pd
from sklearn.metrics import auc

In [None]:
from pals.evaluation import evaluate_performance, compute_pr_curve, get_method_true_answers, get_auc_for_hat_data
from pals.common import *

In [None]:
style.use('seaborn-poster') # sets the size of the charts
sns.set_context('poster') # everything is larger

# HAT Data Evaluation

This notebook is used to compute precision, recall and F-1 scores on the HAT data for the manuscript.

In [None]:
base_dir = os.path.join('test_data', 'HAT')

In [None]:
results = load_obj(os.path.join(base_dir, 'HAT_results.p'))

In [None]:
# set_log_level_info()

In [None]:
N = None

### Plasma

In [None]:
experiment_name = 'plasma'
res = results[experiment_name]
method_true_answers = get_method_true_answers(res, N=N)

In [None]:
plasma_f1_df = evaluate_performance(res, method_true_answers, N)

In [None]:
plasma_f1_df[plasma_f1_df['method'] == 'PALS'].groupby('proportion').describe()['F1']

In [None]:
plasma_f1_df[plasma_f1_df['method'] == 'ORA'].groupby('proportion').describe()['F1']

In [None]:
plasma_f1_df[plasma_f1_df['method'] == 'GSEA'].groupby('proportion').describe()['F1']

In [None]:
sns.boxplot(x='proportion', y='F1', hue='method', hue_order=['ORA', 'GSEA', 'PALS'], data=plasma_f1_df)
plt.legend(loc='upper left')
plt.title('Pathway Ranking Performance (Plasma)')
plt.savefig(os.path.join('test_data', 'HAT', 'evaluation_plasma.eps'), dpi=300)

In [None]:
plasma_auc_df = get_auc_for_hat_data(res, method_true_answers)

In [None]:
sns.boxplot(x='proportion', y='auc', hue='method', hue_order=['ORA', 'GSEA', 'PALS'], data=plasma_auc_df)
plt.legend(loc='upper left')
plt.title('AUC Performance (Plasma)')
plt.savefig(os.path.join('test_data', 'HAT', 'auc_plasma.eps'), dpi=300)

### CSF

In [None]:
experiment_name = 'csf'
res = results[experiment_name]
method_true_answers = get_method_true_answers(res, N=N)

In [None]:
csf_f1_df = evaluate_performance(res, method_true_answers, N=N)

In [None]:
csf_f1_df[csf_f1_df['method'] == 'PALS'].groupby('proportion').describe()['F1']

In [None]:
csf_f1_df[csf_f1_df['method'] == 'ORA'].groupby('proportion').describe()['F1']

In [None]:
csf_f1_df[csf_f1_df['method'] == 'GSEA'].groupby('proportion').describe()['F1']

In [None]:
sns.boxplot(x='proportion', y='F1', hue='method', hue_order=['ORA', 'GSEA', 'PALS'], data=csf_f1_df)
plt.title('Pathway Ranking Performance (CSF)')
plt.legend(loc='upper left')
plt.savefig(os.path.join('test_data', 'HAT', 'evaluation_csf.eps'), dpi=300)

In [None]:
csf_auc_df = get_auc_for_hat_data(res, method_true_answers)

In [None]:
sns.boxplot(x='proportion', y='auc', hue='method', hue_order=['ORA', 'GSEA', 'PALS'], data=csf_auc_df)
plt.legend(loc='upper left')
plt.title('AUC Performance (Plasma)')
plt.savefig(os.path.join('test_data', 'HAT', 'auc_csf.eps'), dpi=300)

### Combined Plot

In [None]:
plasma_f1_df['data'] = 'Plasma'
csf_f1_df['data'] = 'CSF'

In [None]:
df = pd.concat([plasma_f1_df, csf_f1_df])
df.shape

In [None]:
df

In [None]:
g = sns.catplot(x="proportion", y="F1", hue="method", hue_order=['ORA', 'GSEA', 'PALS'], col="data", data=df, kind="box", height=10, aspect=0.7)
plt.suptitle('Pathway Ranking Performance')
# plt.legend(loc='upper left')
plt.tight_layout(rect=[0, 0.03, 1, 0.95])
plt.savefig(os.path.join('test_data', 'HAT', 'evaluation.eps'), dpi=300)

#### True Positives

In [None]:
g = sns.catplot(x="TP", col='proportion', hue="method", data=plasma_f1_df, kind="count", height=10, aspect=1.2, legend=False)
plt.suptitle('True Positive Count (Plasma)', fontsize=48)
plt.tight_layout(rect=[0, 0.03, 1, 0.90])
plt.legend(loc='upper right')
plt.savefig(os.path.join('test_data', 'HAT', 'evaluation_plasma_TP.eps'), dpi=300)

In [None]:
g = sns.catplot(x="TP", col='proportion', hue="method", data=csf_f1_df, kind="count", height=10, aspect=1.2, legend=False)
plt.suptitle('True Positive Count (CSF)', fontsize=48)
plt.tight_layout(rect=[0, 0.03, 1, 0.90])
plt.legend(loc='upper right')
plt.savefig(os.path.join('test_data', 'HAT', 'evaluation_csf_TP.eps'), dpi=300)

#### False Positives

In [None]:
g = sns.catplot(x="FP", col='proportion', hue="method", data=plasma_f1_df, kind="count", height=10, aspect=1.2, legend=False)
plt.suptitle('False Positive Count (Plasma)', fontsize=48)
plt.tight_layout(rect=[0, 0.03, 1, 0.90])
plt.legend(loc='upper right')
plt.savefig(os.path.join('test_data', 'HAT', 'evaluation_plasma_FP.eps'), dpi=300)

In [None]:
g = sns.catplot(x="FP", col='proportion', hue="method", data=csf_f1_df, kind="count", height=10, aspect=1.2, legend=False)
plt.suptitle('False Positive Count (CSF)', fontsize=48)
plt.tight_layout(rect=[0, 0.03, 1, 0.90])
plt.legend(loc='upper right')
plt.savefig(os.path.join('test_data', 'HAT', 'evaluation_csf_FP.eps'), dpi=300)

#### False Negatives

In [None]:
g = sns.catplot(x="FN", col='proportion', hue="method", data=plasma_f1_df, kind="count", height=10, aspect=1.2, legend=False)
plt.suptitle('False Negative Count (Plasma)', fontsize=48)
plt.tight_layout(rect=[0, 0.03, 1, 0.90])
plt.legend(loc='upper right')
plt.savefig(os.path.join('test_data', 'HAT', 'evaluation_plasma_FN.eps'), dpi=300)

In [None]:
g = sns.catplot(x="FN", col='proportion', hue="method", data=csf_f1_df, kind="count", height=10, aspect=1.2, legend=False)
plt.suptitle('False Negative Count (CSF)', fontsize=48)
plt.tight_layout(rect=[0, 0.03, 1, 0.90])
plt.legend(loc='upper right')
plt.savefig(os.path.join('test_data', 'HAT', 'evaluation_csf_FN.eps'), dpi=300)