# Setup Variables and home directory

MNIST, FashionMNIST, GTSRB, Cifar10

In [1]:
DATASET = 'MNIST'
SEED = 42
CUDA = 0
GPU_NAME = f'cuda:{CUDA}'

In [2]:
# set homw directory
import os
from pathlib import Path

base = Path().cwd()

if base.name != 'runtime-monitoring':
    os.chdir('../')
    base = Path().cwd()

base

PosixPath('/home/ah19/runtime-monitoring')

In [3]:
# disable warnings
import warnings
warnings.filterwarnings('ignore')

# Libraries

In [4]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from utilities.pathManager import fetchPaths
from utilities.utils import load_json

# Paths

In [5]:
paths = fetchPaths(base, DATASET, '', False)

path_bdd = paths['bdd'].parent
config = load_json(paths['configuration'])

flavors = config['configuration']['flavors']
batch_size = config['model_config']['batch_size']
lhl_neurons = config['configuration']['lhl_neurons']
optim = list(config['configuration']['optimizer'].keys())[0]

path_bdd

PosixPath('/home/ah19/runtime-monitoring/experiments/MNIST/bdd')

# Seed

In [6]:
np.random.seed(SEED)

# Import Data

In [7]:
model_names = []
for lhl in lhl_neurons:
    for flavor in flavors:
        model_names.append(f'{DATASET}_{optim}-{batch_size}-{lhl}/{flavor}')

In [9]:
model_names

['MNIST_SGD-32-5/raw',
 'MNIST_SGD-32-5/scaler_pca',
 'MNIST_SGD-32-30/raw',
 'MNIST_SGD-32-30/scaler_pca',
 'MNIST_SGD-32-60/raw',
 'MNIST_SGD-32-60/scaler_pca']

In [89]:
temp_path = path_bdd / model_names[0]
temp_path

PosixPath('/home/ah19/runtime-monitoring/experiments/MNIST/bdd/MNIST_SGD-32-5/raw')

In [90]:
for score in temp_path.glob('all*scores*.csv'):
    print(score)

/home/ah19/runtime-monitoring/experiments/MNIST/bdd/MNIST_SGD-32-5/raw/all-thlds-scores-4-raw_gte_mean_MNIST_SGD-32-5.csv
/home/ah19/runtime-monitoring/experiments/MNIST/bdd/MNIST_SGD-32-5/raw/all-thlds-scores-4-raw_top_third_MNIST_SGD-32-5.csv
/home/ah19/runtime-monitoring/experiments/MNIST/bdd/MNIST_SGD-32-5/raw/all-thlds-scores-4-raw_MNIST_SGD-32-5.csv


In [10]:
def read_df(path, f, lhl, bs):
    d = pd.read_csv(path)
    d['flavor'] = f
    d['lhl'] = lhl
    d['batch_size'] = bs
    if path.name.find('top_third') != -1:
        d['subset_type'] = 'top third'
    elif path.name.find('gte_mean') != -1:
        d['subset_type'] = 'gte mean'
    else:
        d['subset_type'] = 'raw'
    return d

def collect_dfs(path, key, f, lhl, bs):
    return pd.concat([read_df(p, f, lhl, bs) for p in path.glob(f'all*{key}*.csv')])

In [16]:
from itertools import product

In [18]:
df_info = pd.concat([collect_dfs(path_bdd / f'{DATASET}_{optim}-{batch_size}-{lhl}/{flavor}', 'info', flavor, lhl, batch_size ) \
for lhl, flavor in product(lhl_neurons, flavors)])

In [19]:
df_score = pd.concat([collect_dfs(path_bdd / f'{DATASET}_{optim}-{batch_size}-{lhl}/{flavor}', 'scores', flavor, lhl, batch_size ) \
for lhl, flavor in product(lhl_neurons, flavors)])

In [26]:
df_info.sample(10)

Unnamed: 0,thld,eta,build_time,size_mb,reorder_time,num_patterns,num_unique_patterns,num_reorder,num_neurons,start_time,end_time,flavor,lhl,batch_size,subset_type
3,qth_0.9,3.0,0.02,4307.472,0.0,,,0.0,31.0,2023-07-25 18:18:06,2023-07-25 18:18:07,raw,60,32,gte mean
16,qth_0.3,1.0,0.036,4306.502,0.0,,,0.0,20.0,2023-07-25 18:19:39,2023-07-25 18:19:41,raw,60,32,top third
2,qth_0.9,2.0,0.001,4304.202,0.0,,,0.0,17.0,2023-07-25 18:17:41,2023-07-25 18:17:41,raw,30,32,gte mean
9,qth_0.7,4.0,0.0,4303.393,0.0,,,0.0,1.0,2023-07-25 18:17:35,2023-07-25 18:17:35,raw,5,32,top third
8,qth_0.7,3.0,2.777,4523.947,0.0,,,0.0,60.0,2023-07-25 18:32:40,2023-07-25 18:35:27,raw,60,32,raw
1,qth_0.9,1.0,1.277,4334.542,0.0,,,0.0,60.0,2023-07-25 18:21:14,2023-07-25 18:22:31,raw,60,32,raw
21,relu,1.0,1.496,4382.009,0.0,,,0.0,30.0,2023-07-25 18:49:15,2023-07-25 18:50:44,scaler_pca,30,32,raw
9,qth_0.7,4.0,0.0,4303.665,0.0,,,0.0,10.0,2023-07-25 18:17:52,2023-07-25 18:17:52,raw,30,32,top third
5,qth_0.7,0.0,0.001,4303.393,0.0,57011.0,2.0,0.0,1.0,2023-07-25 18:17:33,2023-07-25 18:17:33,raw,5,32,top third
0,qth_0.9,0.0,0.018,4306.131,0.0,58197.0,15910.0,0.0,30.0,2023-07-25 18:49:05,2023-07-25 18:49:06,scaler_pca,30,32,raw


In [27]:
df_score.sample(10)

Unnamed: 0,y,count,false,false_misclassified,false_classified,outOfPattern,outOfPatternMisclassified,outOfPatternClassified,eta,stage,thld,flavor,lhl,batch_size,subset_type
436,4,980.0,0.0,0.0,0.0,0.0,0.0,1.0,1,test,qth_0.3,raw,30,32,top third
257,5,5421.0,74.0,74.0,0.0,0.013651,1.0,0.0,1,train,qth_0.5,raw,60,32,raw
338,2,1030.0,306.0,13.0,293.0,0.297087,0.042484,0.957516,3,test,qth_0.5,raw,60,32,gte mean
132,0,5923.0,0.0,0.0,0.0,0.0,0.0,1.0,1,train,qth_0.7,raw,5,32,gte mean
478,all,9984.0,1130.0,61.0,1069.0,0.113181,0.053982,0.946018,4,test,qth_0.3,raw,30,32,raw
272,8,5851.0,62.0,62.0,0.0,0.010596,1.0,0.0,2,train,qth_0.5,raw,60,32,gte mean
20,8,5851.0,105.0,105.0,0.0,0.017946,1.0,0.0,1,train,qth_0.9,scaler_pca,60,32,raw
137,5,5421.0,70.0,70.0,0.0,0.012913,1.0,0.0,1,train,qth_0.7,raw,60,32,raw
211,7,1027.0,1023.0,18.0,1005.0,0.996105,0.017595,0.982405,2,test,qth_0.7,scaler_pca,60,32,raw
447,3,1008.0,869.0,11.0,858.0,0.862103,0.012658,0.987342,2,test,qth_0.3,raw,60,32,raw


In [29]:
df_score.loc[(df_score['thld'] == 'relu') &
             (df_score['eta'] == 4) &
            (df_score['y'] == 'all')]

Unnamed: 0,y,count,false,false_misclassified,false_classified,outOfPattern,outOfPatternMisclassified,outOfPatternClassified,eta,stage,thld,flavor,lhl,batch_size,subset_type
538,all,60000.0,0.0,0.0,0.0,0.0,0.0,1.0,4,train,relu,raw,5,32,gte mean
598,all,9984.0,0.0,0.0,0.0,0.0,0.0,1.0,4,test,relu,raw,5,32,gte mean
538,all,60000.0,0.0,0.0,0.0,0.0,0.0,1.0,4,train,relu,raw,5,32,top third
598,all,9984.0,0.0,0.0,0.0,0.0,0.0,1.0,4,test,relu,raw,5,32,top third
538,all,60000.0,0.0,0.0,0.0,0.0,0.0,1.0,4,train,relu,raw,5,32,raw
598,all,9984.0,0.0,0.0,0.0,0.0,0.0,1.0,4,test,relu,raw,5,32,raw
538,all,60000.0,0.0,0.0,0.0,0.0,0.0,1.0,4,train,relu,scaler_pca,5,32,raw
598,all,9984.0,0.0,0.0,0.0,0.0,0.0,1.0,4,test,relu,scaler_pca,5,32,raw
538,all,60000.0,26.0,26.0,0.0,0.000433,1.0,0.0,4,train,relu,raw,30,32,gte mean
598,all,9984.0,10.0,2.0,8.0,0.001002,0.2,0.8,4,test,relu,raw,30,32,gte mean
