# Setup Variables

MNIST, FashionMNIST, GTSRB, Cifar10

In [1]:
DATASET = 'GTSRB'
POSTFIX = 'Adam-32-50'
FILENAME_POSTFIX = f'{DATASET}_{POSTFIX}'
SEED = 42
CUDA = 0
GPU_NAME = f'cuda:{CUDA}'

In [2]:
import os
from pathlib import Path

base = Path().cwd()

if base.name != 'runtime-monitoring':
    os.chdir('../')
    base = Path().cwd()

base

PosixPath('/home/ah19/runtime-monitoring')

In [3]:
# disable warnings
import warnings
warnings.filterwarnings('ignore')

# Libraries

In [4]:
import pandas as pd
import numpy as np
from pathlib import Path

from fastprogress import progress_bar

from utilities.utils import load_json, load_pickle
from utilities.pathManager import fetchPaths
from utilities.MonitorUnifiedBDD import MonitorBDD

# Paths

In [5]:
paths = fetchPaths(base, DATASET, POSTFIX, False)

path_data = paths['data']
path_lhl = paths['lhl']
path_lhl_raw = paths['lhl_raw']
path_lhl_pca = paths['lhl_pca']

# configs = load_json(paths['configuration'])
# config = configs['configuration']
# model_setup = configs['model_setup']
# model_config = configs['model_config']
# optim_name = list(config['optimizer'].keys())[0]
# optim_args = config['optimizer'][optim_name]
# scheduler_name = list(config['scheduler'].keys())[0]
# scheduler_args = config['scheduler'][scheduler_name]

# Seed

In [6]:
np.random.seed(SEED)

# Import Data

In [7]:
FLAVOR = 'raw'

if FLAVOR == 'raw':
    path_lhl = path_lhl_raw
else: path_lhl = path_lhl_pca

# import Data
df_train = pd.read_csv(path_lhl / f"{FILENAME_POSTFIX}_{FLAVOR}_train.csv")

# select only true classified
df_true = df_train[df_train["true"] == True].copy()
df_true = df_true.drop("true", axis=1).reset_index(drop=True)

df_test = pd.read_csv(path_lhl / f"{FILENAME_POSTFIX}_{FLAVOR}_test.csv")

# Shuffel and Split indecies

In [8]:
periods = 5

# shuffle data
schuffled_index = df_true.index.to_numpy().copy()
np.random.shuffle(schuffled_index)

# split data into batches
fragments_index = np.array_split(schuffled_index, periods)

# print shape
schuffled_index.shape, np.concatenate(fragments_index).shape

((77345,), (77345,))

# Build BDD

In [9]:
# each set of data has its own threshold
thld_p = 0.9
eta = 0
neurons = []


# save scores and stats
stats = pd.DataFrame()
train_scores = pd.DataFrame()
test_scores = pd.DataFrame()

# progress bar
pb = progress_bar(range(len(fragments_index)))

# buidl for each fragment
for fragment in pb:
    # indecies
    ix = np.concatenate(fragments_index[0:fragment+1])

    # calculate threshold
    thld = np.quantile( df_true.loc[ix].drop('y', axis=1), thld_p)
    
    # BDD
    patterns = MonitorBDD( df_true.loc[ix].shape[1]-1, thld, neurons=neurons)
    df_train_copy, df_test_copy = patterns.add_dataframe( df_true.loc[ix], eta, eval_dfs=[df_train.loc[ix].copy(), df_test.copy()] )

    
    # score
    df_train_scores = patterns.score_dataframe_multi_eta(df_train_copy, eta)
    df_test_scores = patterns.score_dataframe_multi_eta(df_test_copy, eta)

    
    # add metadata
    patterns.stats['thld'] = f'qth_{thld_p}'
    patterns.stats['period'] = fragment
    patterns.stats['num_observations'] = ix.shape[0]
    df_train_scores['period'] = fragment
    df_train_scores['num_observations'] = ix.shape[0]
    df_test_scores['period'] = fragment
    df_test_scores['num_observations'] = ix.shape[0]
    
    
    # collect scores
    stats = pd.concat([stats, patterns.stats])
    train_scores = pd.concat([train_scores, df_train_scores])
    test_scores = pd.concat([test_scores, df_test_scores])

df (15469, 52) <class 'int'>


ValueError: Length of values (13859) does not match length of index (15469)

In [None]:
stats

In [None]:
train_scores.loc[ train_scores['y'] == 'all' ]

In [None]:
test_scores.loc[ test_scores['y'] == 'all' ]