# Displaying Experiment Results after Evaluation (loading from dataframe)

In [1]:
#Basics
import numpy as np
import matplotlib.pyplot as plt
import os
import pandas as pd

#Evaluation
from sklearn.metrics import roc_curve, roc_auc_score, confusion_matrix, recall_score, f1_score, precision_score

#PyTorch
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import SubsetRandomSampler

#Torchvision
import torchvision
from torchvision import transforms, datasets

#Data
from FRDEEP import FRDEEPF
from MiraBest import MiraBest_full

#My Functions / Models
from models_new import *
from utils import *

#Other
import pickle
from tqdm import tqdm
import PIL
from torchsummary import summary
from models.networks_other import init_weights

In [2]:
data_options = ['FRDEEP-F','MiraBest','MiraBestNOHYBRID','MiraBestNOUNC','automatic']
if 'df' in locals():
    del df
# The data in the dictionaries is in the format (form):
form = ['data_train','net_name','date','lr','epoch','auc','friP','friN','friiN','friiP','frirecall','friirecall','friprecision','friiprecision','frif1','friif1','fpr','tpr','thresholds']
### Add models trained on each dataset
for data in data_options[:-1]:
    # Load in dict:
    pickle_in = open(f'evaluated_models_{data}.pickle','rb')
    evaluated_models = pickle.load(pickle_in)
    pickle_in.close()

    # Load dict into dataframe
    df_temp = pd.DataFrame.from_dict(evaluated_models,orient='index',columns=form)
    # Make keys of dict into their own column
    df_temp['model'] = df_temp.index
    # Add data_evaluation column
    if data == 'MiraBest':
        df_temp = df_temp.assign(data_eval = f'{data}HYBRID')
    else:
        df_temp = df_temp.assign(data_eval = f'{data}')
    
    # Assign df or stack onto df
    if 'df' not in locals(): #Initialise
        df = df_temp
    elif 'df' in locals(): #Stack
        df = pd.concat([df,df_temp],axis=0)
    else:
        print('Something went wrong!')
    
# Reset index to numeric digits
df = df.reset_index(drop=True)
#df[df.date!='TEST'].sort_values(by='date',ascending=False).head()

In [3]:
# Add accuracy column
df['accuracy']=(df.friP+df.friiP)/(df.friP+df.friiP+df.friiN+df.friN)
#accuracy = (df.friP.to_numpy()+df.friiP.to_numpy())/(df.friP.to_numpy()+df.friiP.to_numpy()+df.friiN.to_numpy()+df.friN.to_numpy()) #NUMPY ACCURACY

In [4]:
df = df.sort_values(by=['accuracy','auc'],ascending=False)
df[(df.data_train != 'FRDEEP-F') & (df.net_name != 'transfer_original') & (df.data_eval=='FRDEEP-F')][['auc','accuracy','data_train','data_eval','net_name','model','date']].head(10)

Unnamed: 0,auc,accuracy,data_train,data_eval,net_name,model,date
125,0.896116,0.855389,MiraBestNOHYBRID,FRDEEP-F,AGRadGalNet,0624Exp2-softmax-MiraBestNOHYBRIDAdam-AGRadGal...,0624Exp2
115,0.849442,0.853944,MiraBestNOHYBRID,FRDEEP-F,AGRadGalNet,0622Exp1-tanh-MiraBestNOHYBRIDAdam-AGRadGalNet...,0622Exp1
116,0.846905,0.847333,MiraBestNOHYBRID,FRDEEP-F,AGRadGalNet,0624Exp2-std_mean_norm-MiraBestNOHYBRIDAdam-AG...,0624Exp2
104,0.831645,0.836611,MiraBestNOHYBRID,FRDEEP-F,AGRadGalNet,0622Exp1-sigmoid-MiraBestNOHYBRIDAdam-AGRadGal...,0622Exp1
127,0.930676,0.835389,MiraBestNOHYBRID,FRDEEP-F,AGRadGalNet,0709Exp2-softmax-MiraBestNOHYBRIDAdam-AGRadGal...,0709Exp2
103,0.846351,0.834667,MiraBestNOHYBRID,FRDEEP-F,AGRadGalNet,0624Exp2-range_norm-MiraBestNOHYBRIDAdam-AGRad...,0624Exp2
119,0.844833,0.832722,MiraBestNOHYBRID,FRDEEP-F,AGRadGalNet,0624Exp2-tanh-MiraBestNOHYBRIDAdam-AGRadGalNet...,0624Exp2
123,0.830156,0.828056,MiraBestNOHYBRID,FRDEEP-F,AGRadGalNet,0624Exp2-tanh-MiraBestNOHYBRIDAdam-AGRadGalNet...,0624Exp2
108,0.822033,0.826667,MiraBestNOHYBRID,FRDEEP-F,AGRadGalNet,0624Exp2-std_mean_norm-MiraBestNOHYBRIDAdam-AG...,0624Exp2
102,0.825743,0.825222,MiraBestNOHYBRID,FRDEEP-F,AGRadGalNet,0624Exp2-sigmoid-MiraBestNOHYBRIDAdam-AGRadGal...,0624Exp2


# Preping the Dataframes

In [5]:
#df['normalisation']=df.model.str.split('-')[1]
exp1 = df.loc[df.date=='0709Exp1'].copy()
exp1['normalisation']=exp1.model.str.split('-').str[1]
exp1['aggregation']=exp1.model.str.split('-').str[-3].str[11:]
exp1['f1']=(exp1.frif1+exp1.friif1)/2
exp1['precision']=(exp1.friprecision+exp1.friiprecision)/2
exp1['recall']=(exp1.frirecall+exp1.friirecall)/2
view1 = exp1[['data_eval', 
              'normalisation', 
              'aggregation',
              'accuracy', 
              'auc',
              'f1',
              'precision',
              'recall'
             ]]

exp2 = df.loc[df.date=='0709Exp2'].copy()
exp2['normalisation']=exp2.model.str.split('-').str[1]
exp2['aggregation']=exp2.model.str.split('-').str[-3].str[11:]
exp2['f1']=(exp2.frif1+exp2.friif1)/2
exp2['precision']=(exp2.friprecision+exp2.friiprecision)/2
exp2['recall']=(exp2.frirecall+exp2.friirecall)/2
view2 = exp2[['data_eval', 
              'normalisation', 
              #'aggregation',
              'accuracy', 
              'auc',
              'frif1','friif1',
              'friprecision','friiprecision',
              'frirecall','friirecall'
              #'f1',
              #'precision',
              #'recall'
             ]]
print('Original keys: \n', df.keys())
print('Evaluation keys: \n', exp1.keys())

Original keys: 
 Index(['data_train', 'net_name', 'date', 'lr', 'epoch', 'auc', 'friP', 'friN',
       'friiN', 'friiP', 'frirecall', 'friirecall', 'friprecision',
       'friiprecision', 'frif1', 'friif1', 'fpr', 'tpr', 'thresholds', 'model',
       'data_eval', 'accuracy'],
      dtype='object')
Evaluation keys: 
 Index(['data_train', 'net_name', 'date', 'lr', 'epoch', 'auc', 'friP', 'friN',
       'friiN', 'friiP', 'frirecall', 'friirecall', 'friprecision',
       'friiprecision', 'frif1', 'friif1', 'fpr', 'tpr', 'thresholds', 'model',
       'data_eval', 'accuracy', 'normalisation', 'aggregation', 'f1',
       'precision', 'recall'],
      dtype='object')


# Experiment 1 Results:

# TODO:
- Add columns for normalisation and aggregation
- Add colums for performative ratio on other datasets?

In [6]:
view1.sort_values(by = ['normalisation', 'auc'], ascending = False).round(2)

Unnamed: 0,data_eval,normalisation,aggregation,accuracy,auc,f1,precision,recall
574,MiraBestNOUNC,range_norm,ft,0.93,0.97,0.92,0.94,0.92
580,MiraBestNOUNC,range_norm,ft,0.92,0.96,0.92,0.92,0.92
286,MiraBestHYBRID,range_norm,ft,0.92,0.96,0.92,0.92,0.92
280,MiraBestHYBRID,range_norm,ft,0.84,0.92,0.84,0.85,0.84
427,MiraBestNOHYBRID,range_norm,ft,0.84,0.92,0.84,0.86,0.84
433,MiraBestNOHYBRID,range_norm,ft,0.84,0.9,0.84,0.84,0.84
146,FRDEEP-F,range_norm,ft,0.89,0.9,0.88,0.88,0.88
133,FRDEEP-F,range_norm,ft,0.79,0.85,0.79,0.8,0.79
139,FRDEEP-F,range_norm,ft,0.74,0.8,0.74,0.74,0.74
587,MiraBestNOUNC,range_norm,ft,0.73,0.77,0.72,0.78,0.74


In [7]:
view1[view1.data_eval=='MiraBestNOHYBRID'].round(2)

Unnamed: 0,data_eval,normalisation,aggregation,accuracy,auc,f1,precision,recall
427,MiraBestNOHYBRID,range_norm,ft,0.84,0.92,0.84,0.86,0.84
433,MiraBestNOHYBRID,range_norm,ft,0.84,0.9,0.84,0.84,0.84
440,MiraBestNOHYBRID,range_norm,ft,0.69,0.72,0.67,0.74,0.69


In [8]:
view1[view1.data_eval=='MiraBestNOHYBRID'].groupby(['normalisation']).mean().round(2)

Unnamed: 0_level_0,accuracy,auc,f1,precision,recall
normalisation,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
range_norm,0.79,0.84,0.79,0.81,0.79


# Experiment 2 Results:

In [18]:
exp2 = df.loc[df.date=='0709Exp2'].copy()
exp2['normalisation']=exp2.model.str.split('-').str[1]
exp2['aggregation']=exp2.model.str.split('-').str[-3].str[11:]
exp2['f1']=(exp2.frif1+exp2.friif1)/2
exp2['precision']=(exp2.friprecision+exp2.friiprecision)/2
exp2['recall']=(exp2.frirecall+exp2.friirecall)/2
view2 = exp2[['data_eval', 
              'normalisation', 
              'aggregation',
              'accuracy', 
              'auc',
              'frif1','friif1',
              'friprecision','friiprecision',
              'frirecall','friirecall'
              #'f1',
              #'precision',
              #'recall'
             ]]
view2.sort_values(by=['aggregation','normalisation'],ascending=False).head(20).round(2)

Unnamed: 0,data_eval,normalisation,aggregation,accuracy,auc,frif1,friif1,friprecision,friiprecision,frirecall,friirecall
575,MiraBestNOUNC,std_mean_norm,mean,0.77,0.77,0.74,0.79,0.79,0.75,0.69,0.84
134,FRDEEP-F,std_mean_norm,mean,0.74,0.73,0.67,0.78,0.75,0.73,0.6,0.84
428,MiraBestNOHYBRID,std_mean_norm,mean,0.72,0.72,0.68,0.75,0.77,0.69,0.61,0.82
281,MiraBestHYBRID,std_mean_norm,mean,0.72,0.72,0.68,0.75,0.76,0.69,0.61,0.82
584,MiraBestNOUNC,softmax,mean,0.93,0.98,0.92,0.93,0.97,0.89,0.87,0.98
290,MiraBestHYBRID,softmax,mean,0.85,0.92,0.84,0.86,0.9,0.82,0.78,0.92
437,MiraBestNOHYBRID,softmax,mean,0.85,0.92,0.83,0.86,0.9,0.82,0.78,0.92
143,FRDEEP-F,softmax,mean,0.75,0.88,0.74,0.75,0.68,0.84,0.83,0.69
570,MiraBestNOUNC,sigmoid,mean,0.92,0.92,0.91,0.93,0.97,0.88,0.85,0.98
276,MiraBestHYBRID,sigmoid,mean,0.84,0.84,0.82,0.85,0.9,0.8,0.75,0.92


In [19]:
view2[view2.data_eval=='MiraBestNOHYBRID'][view2.normalisation=='range_norm'].round(2)

  """Entry point for launching an IPython kernel.


Unnamed: 0,data_eval,normalisation,aggregation,accuracy,auc,frif1,friif1,friprecision,friiprecision,frirecall,friirecall
422,MiraBestNOHYBRID,range_norm,deep_sup,0.84,0.91,0.82,0.85,0.86,0.82,0.8,0.87
425,MiraBestNOHYBRID,range_norm,ft,0.83,0.87,0.81,0.84,0.85,0.8,0.77,0.88
434,MiraBestNOHYBRID,range_norm,concat,0.82,0.87,0.81,0.83,0.83,0.81,0.79,0.85
436,MiraBestNOHYBRID,range_norm,mean,0.8,0.85,0.78,0.82,0.84,0.77,0.73,0.87


In [20]:
view2[view2.data_eval=='MiraBestNOHYBRID'].sort_values(by=['aggregation','normalisation'],ascending=False).round(2)

Unnamed: 0,data_eval,normalisation,aggregation,accuracy,auc,frif1,friif1,friprecision,friiprecision,frirecall,friirecall
428,MiraBestNOHYBRID,std_mean_norm,mean,0.72,0.72,0.68,0.75,0.77,0.69,0.61,0.82
437,MiraBestNOHYBRID,softmax,mean,0.85,0.92,0.83,0.86,0.9,0.82,0.78,0.92
423,MiraBestNOHYBRID,sigmoid,mean,0.84,0.84,0.82,0.85,0.89,0.8,0.75,0.92
436,MiraBestNOHYBRID,range_norm,mean,0.8,0.85,0.78,0.82,0.84,0.77,0.73,0.87
435,MiraBestNOHYBRID,std_mean_norm,ft,0.66,0.67,0.65,0.68,0.66,0.67,0.64,0.69
438,MiraBestNOHYBRID,softmax,ft,0.87,0.93,0.86,0.88,0.91,0.84,0.8,0.93
430,MiraBestNOHYBRID,sigmoid,ft,0.87,0.92,0.86,0.88,0.9,0.84,0.82,0.91
425,MiraBestNOHYBRID,range_norm,ft,0.83,0.87,0.81,0.84,0.85,0.8,0.77,0.88
426,MiraBestNOHYBRID,std_mean_norm,deep_sup,0.63,0.64,0.66,0.6,0.6,0.69,0.74,0.53
421,MiraBestNOHYBRID,softmax,deep_sup,0.85,0.92,0.85,0.86,0.85,0.86,0.85,0.86


In [21]:
# Testset==Trainset Only:
view2[view2.data_eval=='MiraBestNOHYBRID'].groupby(['aggregation']).mean().round(2)

Unnamed: 0_level_0,accuracy,auc,frif1,friif1,friprecision,friiprecision,frirecall,friirecall
aggregation,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
concat,0.83,0.86,0.82,0.84,0.86,0.81,0.78,0.88
deep_sup,0.78,0.82,0.79,0.78,0.77,0.8,0.81,0.75
ft,0.81,0.85,0.79,0.82,0.83,0.79,0.76,0.85
mean,0.8,0.83,0.78,0.82,0.85,0.77,0.72,0.88


In [22]:
view2[view2.data_eval=='MiraBestNOHYBRID'].groupby(['normalisation']).mean().round(2)

Unnamed: 0_level_0,accuracy,auc,frif1,friif1,friprecision,friiprecision,frirecall,friirecall
normalisation,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
range_norm,0.82,0.87,0.81,0.83,0.84,0.8,0.77,0.87
sigmoid,0.84,0.85,0.83,0.84,0.85,0.83,0.81,0.86
softmax,0.86,0.92,0.85,0.87,0.89,0.84,0.81,0.91
std_mean_norm,0.7,0.71,0.69,0.71,0.72,0.7,0.67,0.73


In [14]:
view2.groupby(['data_eval']).mean().round(2)

Unnamed: 0_level_0,accuracy,auc,f1,precision,recall
data_eval,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
FRDEEP-F,0.77,0.82,0.77,0.77,0.77
MiraBestHYBRID,0.81,0.84,0.8,0.81,0.8
MiraBestNOHYBRID,0.81,0.84,0.8,0.81,0.8
MiraBestNOUNC,0.87,0.89,0.86,0.87,0.86


In [15]:
view2[view2.data_eval=='FRDEEP-F'].round(2)

Unnamed: 0,data_eval,normalisation,aggregation,accuracy,auc,f1,precision,recall
127,FRDEEP-F,softmax,deep_sup,0.84,0.93,0.83,0.83,0.84
129,FRDEEP-F,sigmoid,mean,0.82,0.83,0.82,0.83,0.81
135,FRDEEP-F,softmax,concat,0.8,0.87,0.8,0.8,0.8
130,FRDEEP-F,sigmoid,deep_sup,0.8,0.8,0.79,0.79,0.79
145,FRDEEP-F,std_mean_norm,concat,0.79,0.78,0.78,0.8,0.77
140,FRDEEP-F,range_norm,concat,0.78,0.81,0.77,0.77,0.77
138,FRDEEP-F,sigmoid,concat,0.77,0.77,0.77,0.77,0.77
144,FRDEEP-F,softmax,ft,0.77,0.86,0.77,0.77,0.77
128,FRDEEP-F,range_norm,deep_sup,0.76,0.83,0.76,0.76,0.76
136,FRDEEP-F,sigmoid,ft,0.75,0.81,0.75,0.75,0.75


In [16]:
view2[view2.data_eval=='MiraBestNOUNC'].round(2)

Unnamed: 0,data_eval,normalisation,aggregation,accuracy,auc,f1,precision,recall
576,MiraBestNOUNC,softmax,concat,0.94,0.99,0.94,0.94,0.94
577,MiraBestNOUNC,sigmoid,ft,0.94,0.98,0.94,0.94,0.93
585,MiraBestNOUNC,softmax,ft,0.93,0.98,0.92,0.93,0.92
584,MiraBestNOUNC,softmax,mean,0.93,0.98,0.92,0.93,0.92
568,MiraBestNOUNC,softmax,deep_sup,0.92,0.98,0.92,0.92,0.92
569,MiraBestNOUNC,range_norm,deep_sup,0.92,0.98,0.92,0.92,0.92
570,MiraBestNOUNC,sigmoid,mean,0.92,0.92,0.92,0.93,0.91
579,MiraBestNOUNC,sigmoid,concat,0.9,0.9,0.9,0.9,0.9
572,MiraBestNOUNC,range_norm,ft,0.88,0.93,0.88,0.89,0.88
581,MiraBestNOUNC,range_norm,concat,0.88,0.93,0.88,0.88,0.88
