In [73]:
import os
import pandas as pd
import numpy as np

from torch import nn
import torch
from model_attn_seq import dataloader, get_dataframe

In [74]:
data_dir = os.path.abspath('./data')
train_fp = os.path.join(data_dir, 'phase1.train.shuf.tsv')
df = get_dataframe(train_fp)

In [75]:
pred_frames = {}
vocabs = {}
clf_log_probs = {}
dir_output = './output'
for subdir in os.listdir(dir_output):
        if 'boost' in subdir:
            continue
        fp = os.path.join(dir_output, subdir, 'pred_frame.csv')
        if not os.path.exists(fp):
            continue
        df = pd.read_csv(fp)
        pred_frames[subdir] = df

In [76]:
from sklearn import metrics

stats_frame = []
for key in pred_frames:
    pred_frame = pred_frames[key]
    if not 'score' in pred_frame.columns:
        pred_frame['score'] = np.mean(pred_frame[[f'score-f{fold}' for fold in range(5)]], 1)

    y_true, y_prob = pred_frame['assessment'], pred_frame['score']
    x,y,_ = metrics.roc_curve(y_true, y_prob, pos_label=1)
    auc = metrics.auc(x, y)
    tn = (pred_frame['score'] < 0.5) & (pred_frame['assessment'] == -1)
    fp = (pred_frame['score'] >= 0.5) & (pred_frame['assessment'] == -1)    
    fn = (pred_frame['score'] < 0.5) & (pred_frame['assessment'] == 1)
    tp = (pred_frame['score'] >= 0.5) & (pred_frame['assessment'] == 1)    
    row = {}
    row['tn'] = sum(tn)
    row['fp'] = sum(fp)
    row['fn'] = sum(fn)
    row['tp'] = sum(tp)
    row['auc'] = auc
    row['exp'] = key
    stats_frame.append(row)
stats_frame = pd.DataFrame(stats_frame)

In [77]:
stats_frame['specificity'] = stats_frame['tn'] / (stats_frame['tn'] + stats_frame['fp']) 
stats_frame['precision'] = stats_frame['tp'] / (stats_frame['tp'] + stats_frame['fp']) 
stats_frame['recall'] = stats_frame['tp'] / (stats_frame['tp'] + stats_frame['fn']) 
stats_frame['f1'] = (2 * stats_frame['precision'] * stats_frame['recall']) / (stats_frame['precision'] + stats_frame['recall']) 
# stats_frame['tpfp'] = stats_frame['tp']  - stats_frame['fp']

stats_frame.fillna(0, inplace=True)

In [78]:
pd.set_option('max_colwidth', 500)

In [79]:
stats_frame.sort_values(by='fn', ascending=True).head(5)

Unnamed: 0,tn,fp,fn,tp,auc,exp,specificity,precision,recall,f1
55,3740,960,15,135,0.926218,search-complement-sz-1000-up-500-ngrams-1-tf-15,0.795745,0.123288,0.9,0.216867
13,3190,1510,16,134,0.877879,search-complement-sz-1000-up-500-ngrams-2-tf-100,0.678723,0.081509,0.893333,0.149387
26,3728,972,17,133,0.916441,search-complement-sz-2500-up-250-ngrams-1-tf-100,0.793191,0.120362,0.886667,0.211952
89,3817,883,17,133,0.918262,search-complement-sz-1000-up-500-ngrams-2-tf-15,0.812128,0.130906,0.886667,0.22813
117,3809,891,17,133,0.92042,search-complement-sz-1000-up-250-ngrams-2-tf-15,0.810426,0.129883,0.886667,0.226576


In [68]:
stats_frame.sort_values(by='precision', ascending=False).head(5)

Unnamed: 0,tn,fp,fn,tp,auc,exp,specificity,precision,recall,f1
128,4700,0,147,3,0.921821,model-seq-attn-seq-8-emb-64-h-32-ngrams-1,1.0,1.0,0.02,0.039216
131,4700,0,148,2,0.913489,model-seq-attn-seq-8-emb-128-h-32-ngrams-1,1.0,1.0,0.013333,0.026316
81,4700,0,145,5,0.900763,model-mlp-32-cross-fold-bsz-128-ngrams-1-tf-15,1.0,1.0,0.033333,0.064516
100,4700,0,148,2,0.923784,model-seq-attn-seq-8-emb-128-h-64-ngrams-1,1.0,1.0,0.013333,0.026316
1,4700,0,147,3,0.926509,model-seq-attn-seq-8-32-bsz-64-ngrams-1-tf-15,1.0,1.0,0.02,0.039216


In [80]:
stats_frame.sort_values(by='f1', ascending=False).head(5)

Unnamed: 0,tn,fp,fn,tp,auc,exp,specificity,precision,recall,f1
77,4644,56,85,65,0.921058,model-mlp-author-h-32-ngrams-1-w-100,0.988085,0.53719,0.433333,0.479705
34,4521,179,49,101,0.922516,model-mlp-32-sz-2500-ngrams-1-3-tf-15,0.961915,0.360714,0.673333,0.469767
3,4532,168,59,91,0.924294,search-complement-sz-1000-up-500-ngrams-1-tf-1,0.964255,0.351351,0.606667,0.444988
80,4487,213,51,99,0.939088,model-mlp-32-sz-2500-ngrams-1-3-5-tf-15,0.954681,0.317308,0.66,0.428571
130,4608,92,84,66,0.837955,model-seq-attn-pos-cf-80-20-seq-4-emb-256-h-32-ngrams-1-w-100,0.980426,0.417722,0.44,0.428571


In [136]:
subdirs = stats_frame.sort_values(by='f1', ascending=False).head(5)['exp'].values.tolist()

fps = {}
tns = {}
fns = {}
tps = {}
for i, subdir in enumerate(subdirs):
    pred_frame = pred_frames[subdir]
    fp = (pred_frame['score'] >= 0.5) & (pred_frame['assessment'] == -1) 
    fn = (pred_frame['score'] < 0.5) & (pred_frame['assessment'] == 1) 
    tp = (pred_frame['score'] >= 0.5) & (pred_frame['assessment'] == 1) 
    tn = (pred_frame['score'] < 0.5) & (pred_frame['assessment'] == -1) 
    
    fps[i] = set(fp.values.nonzero()[0])
    tns[i] = set(tn.values.nonzero()[0])
    fns[i] = set(fn.values.nonzero()[0])
    tps[i] = set(tp.values.nonzero()[0])

In [137]:
from itertools import combinations
rows = []
for cond_label, cond in [('fp', fps), ('tn', tns), ('fn', fns), ('tp', tps)]:
    for combo in combinations(cond.keys(), 2):
        intersection = len(cond[combo[0]].intersection(cond[combo[1]]))
        union = len(cond[combo[0]].union(cond[combo[1]]))
        difference_0 = len(cond[combo[0]].difference(cond[combo[1]]))
        difference_1 = len(cond[combo[1]].difference(cond[combo[0]]))
        row = {
            'exp_0': combo[0],
            'exp_1': combo[1],
            'cond': cond_label,
            'intersection': intersection,
            'union': union,
            'difference_0': difference_0,
            'difference_1': difference_1,
        }
        rows.append(row)
combo_df = pd.DataFrame(rows)

In [143]:
print(combo_df[combo_df['cond'] == 'fp'].to_latex(index=False))

\begin{tabular}{rrlrrrr}
\toprule
 exp\_0 &  exp\_1 & cond &  intersection &  union &  difference\_0 &  difference\_1 \\
\midrule
     0 &      1 &   fp &            34 &    201 &            22 &           145 \\
     0 &      2 &   fp &            34 &    190 &            22 &           134 \\
     0 &      3 &   fp &            42 &    227 &            14 &           171 \\
     0 &      4 &   fp &            40 &    108 &            16 &            52 \\
     1 &      2 &   fp &            75 &    272 &           104 &            93 \\
     1 &      3 &   fp &            90 &    302 &            89 &           123 \\
     1 &      4 &   fp &            51 &    220 &           128 &            41 \\
     2 &      3 &   fp &            83 &    298 &            85 &           130 \\
     2 &      4 &   fp &            44 &    216 &           124 &            48 \\
     3 &      4 &   fp &            58 &    247 &           155 &            34 \\
\bottomrule
\end{tabular}



In [144]:
keys = stats_frame.sort_values(by='f1', ascending=False).head(10)['exp'].values.tolist()

ens_frame = pd.DataFrame([])
for i, key in enumerate(keys):
    pred_frame = pred_frames[key].reset_index()[['index', 'assessment', 'score']]
    pred_frame.rename(index=None, columns={'score': f'score-ens-{i}'}, inplace=True)
    if len(ens_frame) == 0:
        ens_frame = pred_frame
    else:
        ens_frame = ens_frame.merge(pred_frame, on=['index', 'assessment'])

In [145]:
keys

['model-mlp-author-h-32-ngrams-1-w-100',
 'model-mlp-32-sz-2500-ngrams-1-3-tf-15',
 'search-complement-sz-1000-up-500-ngrams-1-tf-1',
 'model-mlp-32-sz-2500-ngrams-1-3-5-tf-15',
 'model-seq-attn-pos-cf-80-20-seq-4-emb-256-h-32-ngrams-1-w-100',
 'model-mlp-32-sz-2500-lr-decay-0.1',
 'model-mlp-32-sz-2500-ngrams-1-3-5-9-tf-15',
 'baseline-bernoulli',
 'model-seq-attn-author-bin-256-seq-4-emb-256-h-32-ngrams-1-w-100',
 'model-seq-attn-seq-4-emb-256-h-32-ngrams-1-w-1000']

In [149]:
ens_frame['score'] = np.mean(ens_frame[[f'score-ens-{i}' for i in range(len(keys))]], 1)

y_true, y_prob = ens_frame['assessment'], ens_frame['score']
x,y,_ = metrics.roc_curve(y_true, y_prob, pos_label=1)
auc = metrics.auc(x, y)
tn = (ens_frame['score'] < 0.5) & (ens_frame['assessment'] == -1)
fp = (ens_frame['score'] >= 0.5) & (ens_frame['assessment'] == -1)    
fn = (ens_frame['score'] < 0.5) & (ens_frame['assessment'] == 1)
tp = (ens_frame['score'] >= 0.5) & (ens_frame['assessment'] == 1)    
row = {}
row['tn'] = sum(tn)
row['fp'] = sum(fp)
row['fn'] = sum(fn)
row['tp'] = sum(tp)
row['auc'] = auc
row['exp'] = 'top-10'
ens_stats = pd.DataFrame([row])

In [150]:
ens_stats['specificity'] = ens_stats['tn'] / (ens_stats['tn'] + ens_stats['fp']) 
ens_stats['precision'] = ens_stats['tp'] / (ens_stats['tp'] + ens_stats['fp']) 
ens_stats['recall'] = ens_stats['tp'] / (ens_stats['tp'] + ens_stats['fn']) 
ens_stats['f1'] = (2 * ens_stats['precision'] * ens_stats['recall']) / (ens_stats['precision'] + ens_stats['recall']) 
ens_stats.fillna(0, inplace=True)

In [156]:
ens_stats

Unnamed: 0,tn,fp,fn,tp,auc,exp,specificity,precision,recall,f1
0,4630,70,62,88,0.951688,top-10,0.985106,0.556962,0.586667,0.571429
