# Hyperparameter Tuning
## Imports

In [None]:
import pandas as pd
import numpy as np
import seaborn as sns

import lib.transformers as tfs
from sklearn.pipeline import Pipeline

from lib.models import Ganomaly1d, Ganomaly2d, GanomalyFE, GanomalyNet
from lib.visualization import GANomalyBoard, rename_tensorboard_key

from skorch.callbacks import PassthroughScoring, ProgressBar
import torch

from torch.utils.tensorboard import SummaryWriter
from sklearn.model_selection import GridSearchCV

from sklearn.model_selection import train_test_split

from keras.datasets import mnist
import tensorflow

from sklearn.preprocessing import StandardScaler, MinMaxScaler

from lib.others import create_dataset

from lib.others import build_model
from lib.visualization import lineplot_comparison

import seaborn as sns
import matplotlib.pyplot as plt

In [None]:
cwruData0 = pd.read_parquet('data/cwru0.parquet')
cwruData1 = pd.read_parquet('data/cwru1.parquet')

cwruData = pd.concat([cwruData0, cwruData1])

## Settings

## Creating Datasets

In [None]:
normal_features, normal_labels = create_dataset(cwruData, feature_columns = ['fanEndData', 'driveEndData'], label_columns = ['condition', 'faultDiameter', 'motorLoad', 'relativeFaultPosition', 'faultyBearingPosition'], condition = ['Normal Baseline'], sampleRate = [12000])
normal_features_train, normal_features_test, normal_labels_train, normal_labels_test = train_test_split(normal_features, normal_labels, train_size = 400, test_size= 451)

ball_features, ball_labels = create_dataset(cwruData, feature_columns = ['fanEndData'], label_columns = ['condition', 'faultDiameter', 'motorLoad', 'relativeFaultPosition', 'faultyBearingPosition'], condition = ['Ball Fault'], sampleRate = [12000])
inner_features, inner_labels = create_dataset(cwruData, feature_columns = ['fanEndData'], label_columns = ['condition', 'faultDiameter', 'motorLoad', 'relativeFaultPosition', 'faultyBearingPosition'], condition = ['Inner Race Fault'], sampleRate = [12000])
outer_features, outer_labels = create_dataset(cwruData, feature_columns = ['fanEndData'], label_columns = ['condition', 'faultDiameter', 'motorLoad', 'relativeFaultPosition', 'faultyBearingPosition'], condition = ['Outer Race Fault'], sampleRate = [12000])


labels_test = pd.concat([ball_labels, inner_labels, outer_labels, normal_labels_test])
features_test = pd.concat([ball_features, inner_features, outer_features, normal_features_test])

normal_features_test = np.array(normal_features_test.to_list())
normal_features_train = np.array(normal_features_train.to_list())

# GridSearchCV

## Building the time series model and optimizing it it

In [None]:
# model settings

n_z = 600
n_channels = 1
n_feature_maps = 64

adversarial_weight = 1
contextual_weight = 1
encoder_weight = 1
lambda_weight = 0.5

# training settings
device = torch.device("cuda:0") if torch.cuda.is_available() else 'cpu'
n_gpus = 0
workers = 2
batch_size = 16
max_epochs = 50
lr = 0.0001
beta1 = 0.5
beta2 = 0.999

# extra callbacks
callbacks = []

# run number
run = 1000

In [None]:
# model initialization

gan_time_series =\
build_model(
    model = Ganomaly1d,
    
    # model parameters
    input_size = 3136, 
    n_z = n_z,
    n_channels = n_channels,
    n_fm_discriminator = n_feature_maps,  
    n_fm_generator = n_feature_maps,
    adversarial_weight = adversarial_weight,
    contextual_weight = contextual_weight, 
    encoder_weight = encoder_weight,
    lambda_weight = lambda_weight,
    
    # training parameters
    device = device,
    n_gpus = n_gpus,
    workers = workers,
    batch_size = batch_size,
    max_epochs = max_epochs, 
    lr = 0.0001,
    beta1 = 0.5,
    beta2 = 0.999, 
    
    # extra callbacks
    callbacks = callbacks
)

In [None]:
pipeline_time_series = \
Pipeline(steps=[
                ('reshaper', tfs.ArrayReshaper((1, 3136))),
                ('retyper', tfs.ArrayRetyper(np.float32)),
                ('model', gan_time_series)
               ])

In [None]:
# grid search search space

search_space = [0, 1, 10, 20, 30, 40, 50, 60, 70, 80, 90]

search_parameters = \
[
    {
    'model__module__adversarial_weight': search_space,
    },
    {
    'model__module__contextual_weight': search_space,
    },
    {
    'model__module__encoder_weight': search_space,
    }

]

In [None]:
ganomaly_gs = GridSearchCV(pipeline_time_series, search_parameters, refit=False, cv=4, verbose = 4)

In [None]:
ganomaly_gs.fit(normal_features_train)

## Visualize and save the results

In [None]:
sns.set(rc={'figure.figsize':(12, 6)}, style = 'darkgrid')

In [None]:
import pandas as pd

for iteration in range(4):
    
    param_df = pd.DataFrame([[list(entry.keys())[0], list(entry.values())[0]] for entry in ganomaly_gs.cv_results_['params']]).rename({0: 'hyperparameter', 1: 'value'}, axis = 1)
    results_df = pd.DataFrame([abs(ganomaly_gs.cv_results_[f'split{iteration}_test_generator_loss']), abs(ganomaly_gs.cv_results_[f'split{iteration}_test_train_loss'])]).T.rename({0: 'generator_loss', 1: 'train_loss'}, axis = 1)

    results_df = pd.concat([param_df, results_df], axis = 1)
    results_df['hyperparameter'] = results_df['hyperparameter'].str.replace('model__module__', '')
    
    
    if iteration > 0:
        results_df = pd.concat([last_df, results_df])
    
    last_df = results_df

results_df

In [None]:
train_loss_figure, train_loss_ax = plt.subplots()
train_loss_ax = sns.lineplot(data = results_df, x = 'value', y = 'train_loss', hue = 'hyperparameter', err_style='bars', ax = train_loss_ax)
weight = results_df.groupby(['hyperparameter', 'value']).mean().idxmin()['train_loss'][1]

train_loss_ax.axvline(weight, ls='--', color = 'grey')
train_loss_ax.set_ylabel('Train Loss')
train_loss_ax.set_xlabel('Weight')
train_loss_ax.set_title('Train Loss in Relation to Weights')

legend = train_loss_ax.legend_
legend.set_title('Hyperparameter')
for text in legend.texts:
    text.set_text(text.get_text().title().replace('_', ' '))
    
train_loss_figure.savefig('data/results/hyperparameter-tuning_train-loss.png', dpi=330, bbox_inches='tight')

In [None]:
generator_loss_figure, generator_loss_ax = plt.subplots()
generator_loss_ax = sns.lineplot(data = results_df, x = 'value', y = 'generator_loss', hue = 'hyperparameter', err_style='bars', ax = generator_loss_ax)
weight = results_df.groupby(['hyperparameter', 'value']).mean().idxmin()['generator_loss'][1]

generator_loss_ax.axvline(weight, ls='--', color = 'grey')
generator_loss_ax.set_ylabel('Generator Loss')
generator_loss_ax.set_xlabel('Weight')
generator_loss_ax.set_title('Generator Loss in Relation to Weights')

legend = generator_loss_ax.legend_
legend.set_title('Hyperparameter')
for text in legend.texts:
    text.set_text(text.get_text().title().replace('_', ' '))
    
generator_loss_figure.savefig('data/results/hyperparameter-tuning_generator-loss.png', dpi=330, bbox_inches='tight')