# Setup Variables and home directory

MNIST, FashionMNIST, GTSRB, Cifar10

In [1]:
DATASET = 'MNIST'
SEED = 42
CUDA = 0
GPU_NAME = f'cuda:{CUDA}'

In [2]:
# set homw directory
import os
from pathlib import Path

base = Path().cwd()

if base.name != 'runtime-monitoring':
    os.chdir('../')
    base = Path().cwd()

base

PosixPath('/home/ah19/runtime-monitoring')

In [4]:
# print available models
model_names = []
for i in (base / 'experiments' / DATASET / 'saved-models').iterdir():
    if i.name[:len(DATASET)] == DATASET:
        model_names.append(i.name.replace(DATASET + '_', ''))
        

model_names.sort(key=lambda x: int(x.split('-')[-1]))
model_names

['SGD-32-5', 'SGD-32-30', 'SGD-32-60']

In [5]:
POSTFIX = model_names[0]
FILENAME_POSTFIX = f'{DATASET}_{POSTFIX}'

FILENAME_POSTFIX

'MNIST_SGD-32-5'

In [6]:
# disable warnings
import warnings
warnings.filterwarnings('ignore')

# Libraries

In [7]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from fastprogress import progress_bar

from utilities.utils import load_json, load_pickle
from utilities.pathManager import fetchPaths
from utilities.MonitorUnifiedBDD import MonitorBDD

# Paths

In [16]:
paths = fetchPaths(base, DATASET, POSTFIX, False)

path_data = paths['data']
path_lhl = paths['lhl']
path_lhl_raw = paths['lhl_raw']
path_lhl_pca = paths['lhl_scaler_pca']

# Seed

In [11]:
np.random.seed(SEED)

# Import Data

In [17]:
# import Data
df_train = pd.read_csv(path_lhl_raw / f"{FILENAME_POSTFIX}_{FLAVOR}_train.csv")

# select only true classified
df_true = df_train[df_train["true"] == True].copy()
df_true = df_true.drop("true", axis=1).reset_index(drop=True)

df_test = pd.read_csv(path_lhl_raw / f"{FILENAME_POSTFIX}_{FLAVOR}_test.csv")

In [19]:
gte_mean = load_json(path_lhl / "neurons_scaler_pca_gte_mean.json")
top_third = load_json(path_lhl / "neurons_scaler_pca_top_third.json")

# Build BDD

In [39]:
# each set of data has its own threshold
thld_p = 0.9
eta = 0
# neurons = []
# neurons = gte_mean
neurons = top_third


# calculate threshold
thld = np.quantile( df_true.drop('y', axis=1), thld_p, axis=0)


# BDD
patterns = MonitorBDD( df_true.shape[1]-1, thld, neurons=neurons)
df_train_copy, df_test_copy = patterns.add_dataframe( df_true, eta, eval_dfs=[df_train.copy(), df_test.copy()] )


# score
df_train_scores = patterns.score_dataframe_multi_eta(df_train_copy, eta)
df_test_scores = patterns.score_dataframe_multi_eta(df_test_copy, eta)


# add metadata
patterns.stats['thld'] = f'qth_{thld_p}'
patterns.stats['num_observations'] = df_true.shape[0]

In [40]:
patterns.stats

Unnamed: 0,thld,eta,build_time,size_mb,reorder_time,num_patterns,num_unique_patterns,num_reorder,num_neurons,start_time,end_time,num_observations
1,qth_0.9,0.0,0.001,4303.393,0.0,57011.0,11.0,0.0,5.0,2023-07-25 17:39:26,2023-07-25 17:39:26,57011


In [41]:
df_train_scores.loc[ df_train_scores['y'] == 'all' ]

Unnamed: 0,y,count,false,false_misclassified,false_classified,outOfPattern,outOfPatternMisclassified,outOfPatternClassified,eta
10,all,60000.0,0.0,0.0,0.0,0.0,0.0,1.0,0


In [42]:
df_test_scores.loc[ df_test_scores['y'] == 'all' ]

Unnamed: 0,y,count,false,false_misclassified,false_classified,outOfPattern,outOfPatternMisclassified,outOfPatternClassified,eta
10,all,9984.0,0.0,0.0,0.0,0.0,0.0,1.0,0
