## Base Path

In [1]:
# set homw directory
import os
from pathlib import Path

base = Path().cwd()

if base.name != 'runtime-monitoring':
    os.chdir('../')
    base = Path().cwd()

## Import Libraries

In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from itertools import product
from ipywidgets import widgets

from utilities.pathManager import fetchPaths
from utilities.utils import load_json, get_models

In [3]:
# disable warnings
import warnings
warnings.filterwarnings('ignore')

# Setup Variables and home directory

MNIST, FashionMNIST, GTSRB, Cifar10

__Change the following cells to your desired monitor!__

In [4]:
# DATASET = 'MNIST'
DATASET = 'FashionMNIST'
# DATASET = 'GTSRB'

In [5]:
LHL = 60

# Thresholds: Mean, ReLU, 25%-Q, Median, 75%-Q 
THLD = 'Median'

# Flavors: Raw, GTE Mean, Top Third, PCA, Components
FLAVOR = 'GTE Mean'

__Do not change the following cells!__

In [6]:
# rename flavor and fetch its folder [folder, flavor]
flavor_name_file = {
    'Raw': ['raw', 'None'],
    'GTE Mean': ['raw', 'gte_mean'],
    'Top Third': ['raw', 'top_third'],
    'PCA': ['pca', 'None'],
    'Components': ['pca', 'gte_mean']
}

# rename threshold based on the thesis
threshold_name_file = {
    'Mean': 'mean',
    'Median': 'qth_0.5',
    '75%-Q': 'qth_0.75',
    '25%-Q': 'qth_0.25',
    'ReLU': 'relu'
}

In [7]:
# read relevant paths
paths = fetchPaths(base, DATASET, '', False)

# fetch extra data
config = load_json(paths['configuration'])
batch_size = config['model_config']['batch_size']
optim = list(config['configuration']['optimizer'].keys())[0]

# identifying wich version of the dataset
FULLNAME = f'{DATASET}_{optim}-{batch_size}-{LHL}'

# path to folder containing all csv file
path_bdd = paths['bdd'].parent / FULLNAME / flavor_name_file[FLAVOR][0]

# Import Data

In [8]:
# import NN stats
df_model_stats = pd.read_csv(paths['saved_models'].parent.parent / f'{DATASET}_model_stats.csv')
df_NN = df_model_stats.loc[df_model_stats['lhl']==LHL].reset_index(drop=True)

In [9]:
# construct mutual part of the csv name
filename_postfix = f'{flavor_name_file[FLAVOR][1]}-{threshold_name_file[THLD]}-{flavor_name_file[FLAVOR][0]}_{FULLNAME}.csv'

# read the monitor data
df_info = pd.read_csv(path_bdd / f'all-thlds-info-3-{filename_postfix}')
df_scores = pd.read_csv(path_bdd / f'all-thlds-scores-3-{filename_postfix}')
df_info_full = pd.read_csv(path_bdd / f'all-thlds-full-info-3-{filename_postfix}')
df_scores_full = pd.read_csv(path_bdd / f'all-thlds-full-scores-3-{filename_postfix}')

In [10]:
def get_df(df, stage):
    '''column "y" contains the classes and "all" is the row for final results
    last 4 columns are just repetitive data
    '''
    return df.loc[(df['y']=='all') & (df['stage']==stage), df.columns[:-4]]

# NN Statistics

In [11]:
df_NN

Unnamed: 0,lhl,optim,scheduler,epochs,best_epoch,train_losses,test_losses,train_accs,test_accs,train_loss,test_loss,train_acc,test_acc
0,60,AdamW,MultiStepLR,16,10,"[0.7630208693166426, 0.6653183381547286, 0.642...","[0.30950055973461044, 0.29611674466958415, 0.2...","[0.7137806415557861, 0.7455642819404602, 0.754...","[0.8851162195205688, 0.8982371687889099, 0.907...",0.119463,0.229867,0.96183,0.921975


# Montior Statistics

## Only true instances from Train Split

### Metadata

In [12]:
df_info

Unnamed: 0,thld,eta,build_time_min,size_mb,reorder_time_min,num_patterns,num_unique_patterns_%,num_reorder,num_neurons,start_time,end_time
0,qth_0.5,0.0,0.783,4308.9,0.0,57679.0,45.8,0.0,30.0,2023-08-11 14:17:33,2023-08-11 14:18:21
1,qth_0.5,1.0,37.1,4317.7,0.0,792630.0,89.4,0.0,30.0,2023-08-11 14:21:22,2023-08-11 14:58:29
2,qth_0.5,2.0,14.167,4318.3,0.0,399412.0,96.7,0.0,30.0,2023-08-11 15:01:31,2023-08-11 15:15:42
3,qth_0.5,3.0,6.0,4318.3,0.0,201997.0,99.0,0.0,30.0,2023-08-11 15:18:16,2023-08-11 15:24:16


### Train

In [13]:
get_df(df_scores, 'train')

Unnamed: 0,y,total_count,total_misclassified,unrecognized,unrecognized_and_misclassified,unrecognized_and_classified,NPR,NPV,specificity,eta
10,all,59968,2289,1490,1490,0,0.024847,1.0,0.650939,0
21,all,59968,2289,1490,1490,0,0.024847,1.0,0.650939,1
32,all,59968,2289,1490,1490,0,0.024847,1.0,0.650939,2
43,all,59968,2289,1490,1490,0,0.024847,1.0,0.650939,3


### Test

In [14]:
get_df(df_scores, 'test')

Unnamed: 0,y,total_count,total_misclassified,unrecognized,unrecognized_and_misclassified,unrecognized_and_classified,NPR,NPV,specificity,eta
54,all,9984,779,3636,566,3070,0.364183,0.155666,0.726573,0
65,all,9984,779,3636,566,3070,0.364183,0.155666,0.726573,1
76,all,9984,779,3636,566,3070,0.364183,0.155666,0.726573,2
87,all,9984,779,3636,566,3070,0.364183,0.155666,0.726573,3


### Evaluation

In [15]:
get_df(df_scores, 'evaluation')

Unnamed: 0,y,total_count,total_misclassified,unrecognized,unrecognized_and_misclassified,unrecognized_and_classified,NPR,NPV,specificity,eta
89,all,1000,1000,950,0,950,0.95,1.0,0.0,0
91,all,1000,1000,950,0,950,0.95,1.0,0.0,1
93,all,1000,1000,950,0,950,0.95,1.0,0.0,2
95,all,1000,1000,950,0,950,0.95,1.0,0.0,3


## __All__ true instances from Train & Test Split

### Metadata (Full)

In [16]:
df_info_full

Unnamed: 0,thld,eta,build_time_min,size_mb,reorder_time_min,num_patterns,num_unique_patterns_%,num_reorder,num_neurons,start_time,end_time
0,qth_0.5,0.0,1.183,4309.1,0.0,66884.0,44.0,0.0,30.0,2023-08-11 14:22:50,2023-08-11 14:24:02
1,qth_0.5,1.0,39.033,4317.9,0.0,882090.0,89.1,0.0,30.0,2023-08-11 14:27:43,2023-08-11 15:06:46
2,qth_0.5,2.0,14.117,4318.4,0.0,444450.0,96.5,0.0,30.0,2023-08-11 15:09:25,2023-08-11 15:23:33
3,qth_0.5,3.0,6.0,4318.4,0.0,224755.0,98.9,0.0,30.0,2023-08-11 15:25:49,2023-08-11 15:31:49


### Train (Full)

In [17]:
get_df(df_scores_full, 'train')

Unnamed: 0,y,total_count,total_misclassified,unrecognized,unrecognized_and_misclassified,unrecognized_and_classified,NPR,NPV,specificity,eta
10,all,59968,2289,1466,1466,0,0.024446,1.0,0.640454,0
21,all,59968,2289,1466,1466,0,0.024446,1.0,0.640454,1
32,all,59968,2289,1466,1466,0,0.024446,1.0,0.640454,2
43,all,59968,2289,1466,1466,0,0.024446,1.0,0.640454,3


### Test (Full)

In [18]:
get_df(df_scores_full, 'test')

Unnamed: 0,y,total_count,total_misclassified,unrecognized,unrecognized_and_misclassified,unrecognized_and_classified,NPR,NPV,specificity,eta
54,all,9984,779,546,546,0,0.054688,1.0,0.700899,0
65,all,9984,779,546,546,0,0.054688,1.0,0.700899,1
76,all,9984,779,546,546,0,0.054688,1.0,0.700899,2
87,all,9984,779,546,546,0,0.054688,1.0,0.700899,3


### Evaluation (Full)

In [19]:
get_df(df_scores_full, 'evaluation')

Unnamed: 0,y,total_count,total_misclassified,unrecognized,unrecognized_and_misclassified,unrecognized_and_classified,NPR,NPV,specificity,eta
89,all,1000,1000,944,0,944,0.944,1.0,0.0,0
91,all,1000,1000,944,0,944,0.944,1.0,0.0,1
93,all,1000,1000,944,0,944,0.944,1.0,0.0,2
95,all,1000,1000,944,0,944,0.944,1.0,0.0,3
