# Hyperparameter Tuning
## Imports

In [None]:
import pandas as pd
import numpy as np

import lib.transformers as tfs
from sklearn.pipeline import Pipeline

from lib.models import Ganomaly1d, Ganomaly2d, GanomalyFE, GanomalyNet
from lib.visualization import GANomalyBoard, rename_tensorboard_key

from skorch.callbacks import PassthroughScoring, ProgressBar
import torch

from torch.utils.tensorboard import SummaryWriter
from sklearn.model_selection import GridSearchCV

from sklearn.model_selection import train_test_split

from keras.datasets import mnist
import tensorflow

from sklearn.preprocessing import StandardScaler, MinMaxScaler

from lib.others import create_dataset

from lib.others import build_model
from lib.visualization import lineplot_comparison

import seaborn as sns
import matplotlib.pyplot as plt

In [None]:
cwruData0 = pd.read_parquet('data/cwru0.parquet')
cwruData1 = pd.read_parquet('data/cwru1.parquet')

cwruData = pd.concat([cwruData0, cwruData1])

## Settings

## Creating Datasets

In [None]:
normal_features, normal_labels = create_dataset(cwruData, feature_columns = ['fanEndData', 'driveEndData'], label_columns = ['condition', 'faultDiameter', 'motorLoad', 'relativeFaultPosition', 'faultyBearingPosition'], condition = ['Normal Baseline'], sampleRate = [12000])
normal_features_train, normal_features_test, normal_labels_train, normal_labels_test = train_test_split(normal_features, normal_labels, train_size = 400, test_size= 451)

ball_features, ball_labels = create_dataset(cwruData, feature_columns = ['fanEndData'], label_columns = ['condition', 'faultDiameter', 'motorLoad', 'relativeFaultPosition', 'faultyBearingPosition'], condition = ['Ball Fault'], sampleRate = [12000])
inner_features, inner_labels = create_dataset(cwruData, feature_columns = ['fanEndData'], label_columns = ['condition', 'faultDiameter', 'motorLoad', 'relativeFaultPosition', 'faultyBearingPosition'], condition = ['Inner Race Fault'], sampleRate = [12000])
outer_features, outer_labels = create_dataset(cwruData, feature_columns = ['fanEndData'], label_columns = ['condition', 'faultDiameter', 'motorLoad', 'relativeFaultPosition', 'faultyBearingPosition'], condition = ['Outer Race Fault'], sampleRate = [12000])


labels_test = pd.concat([ball_labels, inner_labels, outer_labels, normal_labels_test])
features_test = pd.concat([ball_features, inner_features, outer_features, normal_features_test])

normal_features_test = np.array(normal_features_test.to_list())
normal_features_train = np.array(normal_features_train.to_list())

# GridSearchCV

## Building and training the different Models

In [None]:
# model settings
n_z = 100
n_channels = 1
n_feature_maps = 64

fraud_weight = 1
appearant_weight = 1
latent_weight = 1
lambda_weight = 0.5

# training settings
device = torch.device("cuda:0") if torch.cuda.is_available() else 'cpu'
n_gpus = 0
workers = 2
batch_size = 16
max_epochs = 50
lr = 0.0001
beta1 = 0.5
beta2 = 0.999

# extra callbacks
callbacks = []

# run number
run = 1000

### Stacked Time Series

In [None]:
gan_stacked_time_series =\
build_model(
    model = Ganomaly2d,
    
    # model parameters
    input_size = 56, 
    n_z = n_z,
    n_channels = n_channels,
    n_fm_discriminator = n_feature_maps,  
    n_fm_generator = n_feature_maps,
    fraud_weight = fraud_weight,
    appearant_weight = appearant_weight, 
    latent_weight = latent_weight,
    lambda_weight = lambda_weight,
    
    # training parameters
    device = device,
    n_gpus = n_gpus,
    workers = workers,
    batch_size = batch_size,
    max_epochs = max_epochs, 
    lr = 0.0001,
    beta1 = 0.5,
    beta2 = 0.999, 
    
    # extra callbacks
    callbacks = callbacks
)

In [None]:
pipeline_stacked_time_series = \
Pipeline(steps=[
                ('reshaper', tfs.ArrayReshaper((1, 56, 56))),
                ('scaler', tfs.ArrayMinMaxScaler()),
                ('retyper', tfs.ArrayRetyper(np.float32)),
                ('model', gan_stacked_time_series)
               ])

In [None]:
search_space = [0, 1, 10, 20, 30, 40, 50, 60, 70, 80, 90]

search_parameters = \
[
    {
    'model__module__fraud_weight': search_space,
    },
    {
    'model__module__appearant_weight': search_space,
    },
    {
    'model__module__latent_weight': search_space,
    }

]

In [None]:
ganomaly_gs = GridSearchCV(pipeline_stacked_time_series, search_parameters, refit=False, cv=4)

In [None]:
ganomaly_gs.fit(normal_features_train)

In [2]:
import joblib
test = joblib.load("ganomaly_gs.pkl")



In [3]:
test

GridSearchCV(cv=4,
             estimator=Pipeline(steps=[('reshaper',
                                        <lib.transformers.ArrayReshaper object at 0x00000204341743D0>),
                                       ('scaler',
                                        <lib.transformers.ArrayMinMaxScaler object at 0x000002043418EA90>),
                                       ('retyper',
                                        <lib.transformers.ArrayRetyper object at 0x000002043418EAF0>),
                                       ('model',
                                        <class 'lib.models.GanomalyNet'>[uninitialized](
  module=<class 'lib.models.Ganomaly2...
  module__n_channels=1,
  module__n_fm_discriminator=64,
  module__n_fm_generator=64,
  module__n_gpus=0,
  module__n_z=100,
))]),
             param_grid=[{'model__module__fraud_weight': [0, 1, 10, 20, 30, 40,
                                                          50, 60, 70, 80, 90]},
                         {'model__module__a

In [4]:
pd.concat([pd.DataFrame(test.cv_results_["params"]),pd.DataFrame(test.cv_results_["train_loss"], columns=["generator_loss"])],axis=1)

NameError: name 'pd' is not defined

In [None]:
test.cv_results_['rank_test_train_loss']

In [None]:
test.cv_results_['rank_test_generator_loss']

In [16]:
test.cv_results_["params"][test.cv_results_['rank_test_generator_loss'][0]]

{'model__module__fraud_weight': 70}

In [15]:
test.cv_results_["params"][test.cv_results_['mean_test_generator_loss'].argmax()]

{'model__module__latent_weight': 90}

In [10]:
test.cv_results_['mean_test_train_loss'].max()

-0.03009540494531393

In [14]:
test.cv_results_['mean_test_generator_loss'].argmax()

32

NameError: name 'ganomaly_gs' is not defined