In [146]:
from dash import Dash, html, dcc, callback, Output, Input, dash_table
import plotly.express as px
from plotly.subplots import make_subplots
import matplotlib.pyplot as plt
import pickle
import re
import numpy as np
import pandas as pd

with open('evaluation_full.pkl', 'rb') as f:
    df = pickle.load(f)
    
with open('evaluation_roc.pkl', 'rb') as f:
    df_roc = pickle.load(f)

with open('evaluation_time_full.pkl', 'rb') as f:
    df_time = pickle.load(f)

In [148]:
groups = {
    'RF': ['Tf-Idf Embedding Random Forest', 'TinyBert Embedding Random Forest', 
           'Bert Embedding Random Forest', 'Distilbert Embedding Random Forest', 
           'RoBERTa Embedding Random Forest'],
    'SVM': ['Tf-Idf Embedding SVM', 'TinyBert Embedding SVM', 
            'Bert Embedding SVM', 'Distilbert Embedding SVM', 
            'RoBERTa Embedding SVM'],
    'MLP': ['Tf-Idf Embedding Simple NN', 'TinyBert Embedding Simple NN', 
           'Bert Embedding Simple NN', 'Distilbert Embedding Simple NN', 
           'RoBERTa Embedding Simple NN'],
    'LR': ['Tf-Idf Embedding Logistic Regression', 'TinyBert Embedding Logistic Regression', 
           'Bert Embedding Logistic Regression', 'Distilbert Embedding Logistic Regression', 
           'RoBERTa Embedding Logistic Regression'],
    'IF': ['Tf-Idf Outlier generic', 'TinyBert Outlier generic', 
                'Bert Outlier generic', 'Distilbert Outlier generic', 
                'RoBERTa Outlier generic'],
    'Bert': ['Bert Rejector generic'],
    'TBert': ['TinyBert Rejector generic'],
    'RBert': ['RoBERTa Rejector generic'],
    'DBert': ['Distilbert Rejector generic'],
    'PE-MLP': ['Base Embedding Simple NN'],
    'PE-LR': ['Base Embedding Logistic Regression'],
    'PE-RF': ['Base Embedding Random Forest'],
    'PE-SVM': ['Base Embedding SVM'],
    'PE-IF': ['Base Embedding Outlier generic']
}

def classify(method):
    for group, classifiers in groups.items():
        if method in classifiers:
            return group
    if method.startswith('Dropout Agreement MV'):
        return 'Dropout Agreement MV'
    elif method.startswith('Dropout Agreement DP'):
        return 'Dropout Agreement DP'
    elif method.startswith('Softmax'):
        return 'SR'
    return 'other'

In [149]:
df_time['Group'] = df_time['Method_Classifier'].apply(classify)
max_data_size = df_time.groupby('Dataset')['Data_size'].max().reset_index()
max_data_size.rename(columns={'Data_size': 'Max_Data_size'}, inplace=True)
res = pd.merge(df_time, max_data_size, left_on=['Dataset', 'Data_size'], right_on=['Dataset', 'Max_Data_size'])
res = res[res['Data_size'] == res['Max_Data_size']]

In [156]:
temp = res[res['Dataset'].isin(['AG_News Interpolated', 'AG_News_PHI Interpolated', 'Base_Phi_TransformationSpellcheck Interpolated'])]
temp = temp[temp['Method_Classifier'].isin(['Softmax generic', 'Dropout Agreement DP generic', 'Distilbert Rejector generic', 'Base Embedding Simple NN'])]

In [158]:
temp

Unnamed: 0,Method_Classifier,Dataset,Data_size,Score_50,Score_90,Score_95,Method,Classifier,Score_50_norm,Score_90_norm,Score_95_norm,Initial Time,Inference Time,Initial Info,Inference Info,Group,Max_Data_size
27,Dropout Agreement DP generic,Base_Phi_TransformationSpellcheck Interpolated,10000,-0.120498,-0.026292,-0.013379,Dropout Agreement DP,generic,-1.0,-1.0,-1.0,0.0,11085.875227,"{'MatchVectorTime': 0, 'fit_time': 0}","{'Model Inference': 0, 'test_time': 11085.8752...",Dropout Agreement DP,10000
41,Dropout Agreement DP generic,AG_News_PHI Interpolated,10000,0.010737,0.001478,0.000412,Dropout Agreement DP,generic,0.577401,0.909806,0.850006,0.0,742.103492,"{'MatchVectorTime': 0, 'fit_time': 0}","{'Model Inference': 0, 'test_time': 742.103491...",Dropout Agreement DP,10000
46,Dropout Agreement DP generic,AG_News Interpolated,10000,0.008097,0.001355,0.000512,Dropout Agreement DP,generic,0.371678,0.759239,0.964589,0.0,60.343096,"{'MatchVectorTime': 0, 'fit_time': 0}","{'Model Inference': 0, 'test_time': 60.3430955...",Dropout Agreement DP,10000
621,Distilbert Rejector generic,Base_Phi_TransformationSpellcheck Interpolated,10000,0.07115,0.002636,0.000637,Distilbert Rejector,generic,0.590467,0.100245,0.047588,5603.518692,1.7939,"{'MatchVectorTime': 5572.813392079435, 'fit_ti...",{'test_time': 1.7939},DBert,10000
635,Distilbert Rejector generic,AG_News_PHI Interpolated,10000,0.010726,0.0006,0.000174,Distilbert Rejector,generic,0.57681,0.369509,0.359554,382.50071,1.8433,"{'MatchVectorTime': 351.28901040460914, 'fit_t...",{'test_time': 1.8433},DBert,10000
640,Distilbert Rejector generic,AG_News Interpolated,10000,0.013756,0.000759,0.00021,Distilbert Rejector,generic,0.631449,0.425504,0.395847,64.589297,1.7882,"{'MatchVectorTime': 30.19459716277197, 'fit_ti...",{'test_time': 1.7882},DBert,10000
911,Base Embedding Simple NN,AG_News_PHI Interpolated,10000,0.015573,0.000865,0.000208,Base Embedding,Simple NN,0.837408,0.532546,0.429156,389.251269,70.447215,"{'MatchVectorTime': 351.28901040460914, 'fit_t...","{'Model Inference': 70.42913684621453, 'test_t...",PE-MLP,10000
916,Base Embedding Simple NN,AG_News Interpolated,10000,0.021355,0.001761,0.000468,Base Embedding,Simple NN,0.980253,0.986829,0.882261,36.165562,30.197435,"{'MatchVectorTime': 30.19459716277197, 'fit_ti...","{'Model Inference': 30.19459716277197, 'test_t...",PE-MLP,10000
936,Softmax generic,AG_News_PHI Interpolated,10000,0.018335,0.001625,0.000448,Softmax,generic,0.985934,1.0,0.925043,0.0,71.215301,"{'MatchVectorTime': 0, 'fit_time': 0}","{'Model Inference': 70.42913684621453, 'test_t...",SR,10000
941,Softmax generic,AG_News Interpolated,10000,0.021136,0.001616,0.000478,Softmax,generic,0.9702,0.905767,0.901073,0.0,30.194687,"{'MatchVectorTime': 0, 'fit_time': 0}","{'Model Inference': 30.19459716277197, 'test_t...",SR,10000
