# Setup Variables

MNIST, FashionMNIST, GTSRB, Cifar10

In [12]:
DATASET = 'FashionMNIST'
POSTFIX = 'Adam-32-30'
FILENAME_POSTFIX = f'{DATASET}_{POSTFIX}'
SEED = 42
CUDA = 0
GPU_NAME = f'cuda:{CUDA}'

In [2]:
import os
from pathlib import Path

base = Path().cwd()

if base.name != 'runtime-monitoring':
    os.chdir('../')
    base = Path().cwd()

base

PosixPath('/home/ah19/runtime-monitoring')

# Libraries

In [3]:
import pandas as pd
import numpy as np
from pathlib import Path

from utilities.utils import load_json, load_pickle
from utilities.pathManager import fetchPaths
from utilities.MonitorUnifiedBDD import MonitorBDD

# Paths

In [17]:
paths = fetchPaths(base, DATASET, POSTFIX, False)

path_data = paths['data']
path_lhl = paths['lhl']
path_lhl_raw = paths['lhl_raw']
path_lhl_pca = paths['lhl_pca']

# configs = load_json(paths['configuration'])
# config = configs['configuration']
# model_setup = configs['model_setup']
# model_config = configs['model_config']
# optim_name = list(config['optimizer'].keys())[0]
# optim_args = config['optimizer'][optim_name]
# scheduler_name = list(config['scheduler'].keys())[0]
# scheduler_args = config['scheduler'][scheduler_name]

# Seed

In [11]:
np.random.seed(SEED)

# Import Data

In [95]:
FLAVOR = 'raw'

if FLAVOR == 'raw':
    path_lhl = path_lhl_raw
else: path_lhl = path_lhl_pca

# import Data
df_train = pd.read_csv(path_lhl / f"{FILENAME_POSTFIX}_{FLAVOR}_train.csv")

# select only true classified
df_true = df_train[df_train["true"] == True].copy()
df_true = df_true.drop("true", axis=1).reset_index(drop=True)

df_test = pd.read_csv(path_lhl / f"{FILENAME_POSTFIX}_{FLAVOR}_test.csv")

# Shuffel and Split indecies

In [96]:
periods = 5

schuffled_index = df_true.index.to_numpy().copy()
np.random.shuffle(schuffled_index)
fragments_index = np.array_split(schuffled_index, periods)

schuffled_index.shape, np.concatenate(fragments_index).shape

((57143,), (57143,))

# Build BDD

In [97]:
# each set of data has its own threshold
thld_p = 0.9
eta = 0
neurons = []


# save scores and stats
stats = pd.DataFrame()
train_scores = pd.DataFrame()
test_scores = pd.DataFrame()

# buidl for each fragment
for fragment in range(1, len(fragments_index)+1):
    # calculate threshold
    thld = np.quantile( df_true.drop('y', axis=1), thld_p)
    
    # indecies
    ix = np.concatenate(fragments_index[:fragment])
    
    # BDD
    patterns = MonitorBDD( df_true.shape[1]-1, thld, neurons=neurons)
    df_train_copy, df_test_copy = patterns.add_dataframe( df_true.loc[ix], eta, eval_dfs=[df_train.copy(), df_test.copy()] )
    
    # score
    df_train_scores = patterns.score_dataframe_multi_eta(df_train_copy, eta)
    df_test_scores = patterns.score_dataframe_multi_eta(df_test_copy, eta)
    
    # add metadata
    patterns.stats['thld'] = f'qth_{thld_p}'
    patterns.stats['period'] = fragment
    patterns.stats['num_observations'] = ix.shape[0]
    df_train_scores['period'] = fragment
    df_train_scores['num_observations'] = ix.shape[0]
    df_test_scores['period'] = fragment
    df_test_scores['num_observations'] = ix.shape[0]
    
    
    # collect scores
    stats = pd.concat([stats, patterns.stats])
    train_scores = pd.concat([train_scores, df_train_scores])
    test_scores = pd.concat([test_scores, df_test_scores])

  bdd_stats = self.bdd.statistics()
  bdd_stats = self.bdd.statistics()
  bdd_stats = self.bdd.statistics()
  bdd_stats = self.bdd.statistics()
  bdd_stats = self.bdd.statistics()


In [98]:
stats

Unnamed: 0,thld,eta,build_time,size_mb,reorder_time,num_reorder,num_neurons,start_time,end_time,period,num_observations
1,qth_0.9,0.0,0.006,4304.498,0.0,0.0,30.0,2023-06-01 14:22:29,2023-06-01 14:22:29,1,11429
1,qth_0.9,0.0,0.013,4304.498,0.0,0.0,30.0,2023-06-01 14:22:34,2023-06-01 14:22:34,2,22858
1,qth_0.9,0.0,0.018,4304.498,0.0,0.0,30.0,2023-06-01 14:22:39,2023-06-01 14:22:40,3,34287
1,qth_0.9,0.0,0.023,4304.635,0.0,0.0,30.0,2023-06-01 14:22:44,2023-06-01 14:22:46,4,45715
1,qth_0.9,0.0,0.03,4304.637,0.0,0.0,30.0,2023-06-01 14:22:50,2023-06-01 14:22:52,5,57143


In [99]:
train_scores.loc[ train_scores['y']== 'all']

Unnamed: 0,y,count,false,false_misclassified,false_classified,outOfPattern,outOfPatternMisclassified,outOfPatternClassified,eta,period,num_observations
10,all,60000.0,1745.0,48.0,1697.0,0.029083,0.027507,0.972493,0,1,11429
10,all,60000.0,850.0,33.0,817.0,0.014167,0.038824,0.961176,0,2,22858
10,all,60000.0,413.0,29.0,384.0,0.006883,0.070218,0.929782,0,3,34287
10,all,60000.0,182.0,25.0,157.0,0.003033,0.137363,0.862637,0,4,45715
10,all,60000.0,19.0,19.0,0.0,0.000317,1.0,0.0,0,5,57143


In [100]:
test_scores.loc[ test_scores['y']== 'all']

Unnamed: 0,y,count,false,false_misclassified,false_classified,outOfPattern,outOfPatternMisclassified,outOfPatternClassified,eta,period,num_observations
10,all,10000.0,333.0,14.0,319.0,0.0333,0.042042,0.957958,0,1,11429
10,all,10000.0,214.0,10.0,204.0,0.0214,0.046729,0.953271,0,2,22858
10,all,10000.0,148.0,4.0,144.0,0.0148,0.027027,0.972973,0,3,34287
10,all,10000.0,118.0,4.0,114.0,0.0118,0.033898,0.966102,0,4,45715
10,all,10000.0,91.0,1.0,90.0,0.0091,0.010989,0.989011,0,5,57143
