In [59]:
# https://optuna.readthedocs.io/en/stable/reference/generated/optuna.trial.Trial.html#

import os
from pathlib import Path

import pandas as pd
import numpy as np

import optuna
from optuna.trial import TrialState
from optuna.samplers import CmaEsSampler, RandomSampler, TPESampler

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torch.utils.data
from torchvision import datasets
from torchvision import transforms

In [60]:
REPO_PATH = '/home/ah19/runtime-monitoring'
DATASET = 'MNIST'
PREFIX = 'MNIST32-L2-Adam-100'
NUM_NEURONS = 100
FILENAME_POSTFIX = f"{DATASET}_{PREFIX}"
SEED = 42

In [61]:
import sys
sys.path.append(REPO_PATH + '/utilities')
sys.path.append(f'{REPO_PATH}/{DATASET}/trainingModels')

In [62]:
from utils import *
from plotFunctions import *
from MonitorUnifiedBDD import build_bdd
from pathManager import fetchPaths

In [63]:
base = Path(REPO_PATH)
paths = fetchPaths(base, DATASET)

path = paths[DATASET.lower()]
path_dataset = paths['dataset']
path_trainingModels = paths['trainingModels']
path_lastHiddenLayer = paths['lastHiddenLayer']
path_lastHiddenLayer_raw = paths['lastHiddenLayer_raw'] / FILENAME_POSTFIX
path_lastHiddenLayer_pca = paths['lastHiddenLayer_pca'] / FILENAME_POSTFIX / 'Sinlge'

path_trainingModels = path_trainingModels / 'optuna-last-hidden-layer'
path_trainingModels.mkdir(exist_ok=True)

path_lastHiddenLayer_raw

PosixPath('/home/ah19/runtime-monitoring/MNIST/lastHiddenLayer/raw/MNIST_MNIST32-L2-Adam-100')

In [64]:
def objective(trial):
   
    df = pd.read_csv(path_lastHiddenLayer_raw / f'{FILENAME_POSTFIX}_train.csv')
    df_test = pd.read_csv(path_lastHiddenLayer_raw / f'{FILENAME_POSTFIX}_test.csv')

    # split train data
    df_true = df[df['true'] == True].copy()
    df_true = df_true.drop('true', axis=1).reset_index(drop=True)


#         read data
    qnt_float = trial.suggest_float("quantile", 0, 0.95, step=0.01)
    thld = np.quantile(df_true.drop('y', axis=1), qnt_float, axis=0)

    res_info, res_scores = build_bdd((df.copy(), df_test.copy(), df_true.copy(), None,
                                      f'qt{round(qnt_float, 2)}', thld, 0, None))

    result = float(res_scores.loc[(res_scores['y'] == 'all')
                                  & (res_scores['stage'] == 'test')
                                  , 'outOfPatternMissClassified'].values[0])
    
    # Handle pruning based on the intermediate value.
    if trial.should_prune():
        raise optuna.exceptions.TrialPruned()
        

    return result


In [65]:
total_time = int( 60 * 60 * 1.5 )
total_trials = 100

In [None]:
# params: L2, L1, bs, bn, drop, optim
#          3,  3,  4,  2,    6,     2 = 864

sampler = TPESampler(seed=SEED) # Default
# sampler = CmaEsSampler(seed=SEED)
# sampler = RandomSampler(seed=SEED)

study = optuna.create_study(direction="maximize", sampler=sampler)
study.optimize(objective, n_trials=total_trials, timeout=total_time, n_jobs=20)

pruned_trials = study.get_trials(deepcopy=False, states=[TrialState.PRUNED])
complete_trials = study.get_trials(deepcopy=False, states=[TrialState.COMPLETE])

print("Study statistics: ")
print("  Number of finished trials: ", len(study.trials))
print("  Number of pruned trials: ", len(pruned_trials))
print("  Number of complete trials: ", len(complete_trials))

print("Best trial:")
trial = study.best_trial
# trial = study.best_trials

print("  Value: ", trial.value)

print("  Params: ")
for key, value in trial.params.items():
    print("    {}: {}".format(key, value))

[32m[I 2023-04-09 22:33:11,332][0m A new study created in memory with name: no-name-8550b939-9512-421f-acd0-0d8fc94cf439[0m


qt0.37 - eta: 0
qt0.79 - eta: 0
qt0.45 - eta: 0
qt0.84 - eta: 0
qt0.54 - eta: 0
qt0.43 - eta: 0
qt0.44 - eta: 0
qt0.47 - eta: 0
qt0.67 - eta: 0
qt0.01 - eta: 0
qt0.78 - eta: 0
qt0.56 - eta: 0
qt0.82 - eta: 0
qt0.89 - eta: 0
qt0.88 - eta: 0
qt0.43 - eta: 0
qt0.94 - eta: 0
qt0.36 - eta: 0
qt0.2 - eta: 0
qt0.08 - eta: 0
Applying thresholds ... 
Adding patterns with no degree of freedom ... 
Applying thresholds ... 
Adding patterns with no degree of freedom ... 
Applying thresholds ... 
Adding patterns with no degree of freedom ... 
Applying thresholds ... 
Applying thresholds ... 
Adding patterns with no degree of freedom ... Applying thresholds ... Adding patterns with no degree of freedom ... 


Adding patterns with no degree of freedom ... 
Applying thresholds ... 
Adding patterns with no degree of freedom ... 
Applying thresholds ... 
Applying thresholds ... Adding patterns with no degree of freedom ... 

Adding patterns with no degree of freedom ... 
Applying thresholds ... Applying 

In [None]:
# optuna.visualization.plot_pareto_front(study, target_names=["loss", "accuracy"])

In [None]:
# optuna.visualization.plot_param_importances(study, target=lambda t: t.values[0], target_name="accuracy")

In [None]:
df_studys = study.trials_dataframe().sort_values('value', ascending=False)

In [None]:
df_studys.to_csv(path_trainingModels / f'optunaResults-{type(sampler).__name__}-{PREFIX}-{NUM_NEURONS}.csv' ,index=False)

In [None]:
df_studys = df_studys.set_index('number')

In [None]:
df_studys.head()

In [None]:
import plotly.express as px

y = np.random.randn(10_000)

fig = px.histogram(y)

std_ = np.std(y)
qt_ = np.quantile(y, df_studys.iloc[0, -2])

fig.add_vline(qt_, line_width=3, line_dash="dash", line_color="green")

fig.add_vline(std_, line_width=3, line_dash="dash", line_color="blue")
fig.add_vline(std_ * 2, line_width=3, line_dash="dash", line_color="blue")

fig.add_vrect(x0=y.min(), x1=qt_, line_width=0, fillcolor="red", opacity=0.2)
fig.add_vrect(x0=qt_, x1=y.max(), line_width=0, fillcolor="green", opacity=0.2)

In [None]:
# # params: L2, L1, bs, bn, drop, optim
# #          3,  3,  4,  2,    6,     2 = 864

# # sampler = TPESampler(seed=SEED) # Default
# # sampler = CmaEsSampler(seed=SEED)
# sampler = RandomSampler(seed=SEED)

# study = optuna.create_study(direction="maximize", sampler=sampler)
# study.optimize(objective, n_trials=total_trials, timeout=total_time)

# pruned_trials = study.get_trials(deepcopy=False, states=[TrialState.PRUNED])
# complete_trials = study.get_trials(deepcopy=False, states=[TrialState.COMPLETE])

# print("Study statistics: ")
# print("  Number of finished trials: ", len(study.trials))
# print("  Number of pruned trials: ", len(pruned_trials))
# print("  Number of complete trials: ", len(complete_trials))

# print("Best trial:")
# trial = study.best_trial
# # trial = study.best_trials

# print("  Value: ", trial.value)

# print("  Params: ")
# for key, value in trial.params.items():
#     print("    {}: {}".format(key, value))

In [None]:
# optuna.visualization.plot_pareto_front(study, target_names=["loss", "accuracy"])

In [None]:
# optuna.visualization.plot_param_importances(
#     study, target=lambda t: t.values[0], target_name="accuracy"
# )

In [None]:
# df_studys = study.trials_dataframe().sort_values('value', ascending=False)

In [None]:
# df_studys.to_csv(path_trainingModels / f'optunaResults_{type(sampler).__name__}.csv' ,index=False)

In [None]:
# df_studys = df_studys.set_index('number')

In [None]:
# df_studys.head()

In [None]:
# # params: L2, L1, bs, bn, drop, optim
# #          3,  3,  4,  2,    6,     2 = 864

# # sampler = TPESampler(seed=SEED) # Default
# sampler = CmaEsSampler(seed=SEED)
# # sampler = RandomSampler(seed=SEED)

# study = optuna.create_study(direction="maximize", sampler=sampler)
# study.optimize(objective, n_trials=total_trials, timeout=total_time)

# pruned_trials = study.get_trials(deepcopy=False, states=[TrialState.PRUNED])
# complete_trials = study.get_trials(deepcopy=False, states=[TrialState.COMPLETE])

# print("Study statistics: ")
# print("  Number of finished trials: ", len(study.trials))
# print("  Number of pruned trials: ", len(pruned_trials))
# print("  Number of complete trials: ", len(complete_trials))

# print("Best trial:")
# trial = study.best_trial
# # trial = study.best_trials

# print("  Value: ", trial.value)

# print("  Params: ")
# for key, value in trial.params.items():
#     print("    {}: {}".format(key, value))

In [None]:
# optuna.visualization.plot_pareto_front(study, target_names=["loss", "accuracy"])

In [None]:
# optuna.visualization.plot_param_importances(
#     study, target=lambda t: t.values[0], target_name="accuracy"
# )

In [None]:
# df_studys = study.trials_dataframe().sort_values('value', ascending=False)

In [None]:
# df_studys.to_csv(path_trainingModels / f'optunaResults_{type(sampler).__name__}.csv' ,index=False)

In [None]:
# df_studys = df_studys.set_index('number')

In [None]:
# df_studys.head()