# CWRU Experiments
## Imports

In [1]:
import pandas as pd
import numpy as np

import lib.transformers as tf
from sklearn.pipeline import Pipeline

from lib.model import Ganomaly, GanomalyNet
from lib.visualization import GANomalyBoard, rename_tensorboard_key

from skorch.callbacks import PassthroughScoring
import torch

from torch.utils.tensorboard import SummaryWriter
from sklearn.model_selection import GridSearchCV

from sklearn.model_selection import train_test_split

from keras.datasets import mnist
import tensorflow

In [2]:
cwruData0 = pd.read_parquet('data/cwru0.parquet')
cwruData1 = pd.read_parquet('data/cwru1.parquet')

cwruData = pd.concat([cwruData0, cwruData1])

## Settings

In [3]:
torch.manual_seed(0)
device = torch.device("cuda:0") if torch.cuda.is_available() else 'cpu'

## Initializing Model(s)

In [4]:
nz = 100  # size of the latent z vector
ngf = 64  # units of generator
ndf = 64  # units of discriminator
nc = 1  # number of channels
batch_size = 32
lr = 0.0002
beta1 = 0.5  # for adam
max_epochs = 5
ngpu = 1
isize = 32  # 32 is easier than 28 to work with
workers = 2  # for dataloader

In [5]:
ganomaly = GanomalyNet(
    Ganomaly,
    module__isize = isize,
    module__nz=nz,
    module__ndf=ndf,
    module__ngf=ngf,
    module__nc=nc,
    module__ngpu=ngpu,
    
    module__w_lat = 1,
    
    criterion=torch.nn.BCELoss,

    optimizer_gen=torch.optim.Adam,
    optimizer_gen__lr=0.0002,
    optimizer_gen__betas=(beta1, 0.999),

    optimizer_dis=torch.optim.Adam,
    optimizer_dis__lr=0.00002,
    optimizer_dis__betas=(beta1, 0.999),

    batch_size=batch_size,
    max_epochs=100,

    train_split=False,  # not implemented
    iterator_train__shuffle=True,
    iterator_train__num_workers=workers,
    iterator_valid__num_workers=workers,

    callbacks=[
        PassthroughScoring('loss_dis', on_train=True),
        PassthroughScoring('loss_gen', on_train=True),
        PassthroughScoring('loss_gen_fra', on_train=True),
        PassthroughScoring('loss_gen_app', on_train=True),
        PassthroughScoring('loss_gen_lat', on_train=True)  
    ]
)

## Data Preparation
Building a pipeline of custom transformers to fetch and preprocess CWRU data.

In [6]:
image_size = 56

selection_pipeline = \
Pipeline(steps=[
                ('DataSelector', tf.DataSelector(columns = ['fanEndData', 'driveEndData'], column_values = {'condition': ['Normal Baseline'], 'sampleRate': [12000]})),
                ('ArrayFlattener', tf.ArrayFlattener()),
                #('ArrayEqualizer', tf.ArrayEqualizer()),
                ('ArrayChunker', tf.ArrayChunker(image_size**2)),
                ('ArrayFlattener2', tf.ArrayFlattener()),
                ('ArrayReshaper', tf.ArrayReshaper((1, image_size, image_size)))
               ])

chunked_normal_data = selection_pipeline.transform(cwruData)

  X_ = np.array(X_)


Train-test-split of the normal CWRU data.

In [7]:
train, test = train_test_split(chunked_normal_data, train_size = 400, test_size= 451, random_state = 0)

In [8]:
len(chunked_normal_data)

1078

## GridSearchCV

In [9]:
search_parameters = {
    'module_w_fra': list(range(0, 101, 10)),
    'module_w_app': list(range(0, 101, 10)),
    'module_w_lat': list(range(0, 101, 10)),
}

[0, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100]

In [10]:
ganomaly_gs = GridSearchCV(ganomaly, search_parameters, refit=False, cv=4)

In [11]:
#ganomaly_gs.fit(train)

## Training Visualization (best parameters)
Adding a TensorBoard for the visualization of the training.

In [12]:
ganomaly_board = GANomalyBoard(SummaryWriter(), key_mapper = rename_tensorboard_key, close_after_train = False)
ganomaly.callbacks += [ganomaly_board]

ganomaly.callbacks

[<skorch.callbacks.scoring.PassthroughScoring at 0x2cdf23bd148>,
 <skorch.callbacks.scoring.PassthroughScoring at 0x2cdf23bd188>,
 <skorch.callbacks.scoring.PassthroughScoring at 0x2cdf23bd1c8>,
 <skorch.callbacks.scoring.PassthroughScoring at 0x2cdf23bd208>,
 <skorch.callbacks.scoring.PassthroughScoring at 0x2cdf23bd248>,
 <lib.visualization.GANomalyBoard at 0x2cdf22ddd48>]

In [13]:
ganomaly.fit(train)

  epoch    loss_dis    loss_gen    loss_gen_app    loss_gen_fra    loss_gen_lat    train_loss      dur
-------  ----------  ----------  --------------  --------------  --------------  ------------  -------
      1      0.6467      1.7578          0.1721          0.7455          0.8402        2.4046  15.3742
      2      0.6364      0.9271          0.1298          0.5776          0.2197        1.5636  14.7292
      3      0.6221      0.6477          0.0873          0.4490          0.1114        1.2698  16.4930
      4      0.6063      0.4939          0.0730          0.3510          0.0699        1.1002  16.5689
      5      0.5897      0.3925          0.0668          0.2766          0.0491        0.9822  16.6203
      6      0.5723      0.3217          0.0639          0.2202          0.0377        0.8940  18.3905
      7      0.5537      0.2714          0.0626          0.1772          0.0316        0.8251  17.5113
      8      0.5326      0.2335          0.0611          0.1443          

<class 'lib.model.GanomalyNet'>[initialized](
  module_=Ganomaly(
    (l_fra): BCELoss()
    (l_app): L1Loss()
    (l_dis): L1Loss()
    (discriminator): NetD(
      (features): Sequential(
        (0): Conv2d(1, 64, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1), bias=False)
        (1): LeakyReLU(negative_slope=0.2, inplace=True)
        (2): Conv2d(64, 128, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1), bias=False)
        (3): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (4): LeakyReLU(negative_slope=0.2, inplace=True)
        (5): Conv2d(128, 256, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1), bias=False)
        (6): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (7): LeakyReLU(negative_slope=0.2, inplace=True)
      )
      (classifier): Sequential(
        (0): Conv2d(256, 100, kernel_size=(4, 4), stride=(1, 1), bias=False)
        (Sigmoid): Sigmoid()
      )
    )
    (generator):

In [14]:
ganomaly.predict(test).mean()

0.028867852

In [16]:
ganomaly.predict(selection_pipeline.set_params(DataSelector__column_values = {'condition': ['Outer Race Fault'], 'sampleRate': [12000]}).transform(cwruData)[:200])

array([0.1594355 , 0.1406145 , 0.14918292, 0.14804487, 0.14178324,
       0.14477463, 0.13752337, 0.14540938, 0.15198371, 0.14790529,
       0.15870446, 0.16107456, 0.1543032 , 0.14570117, 0.16333222,
       0.15785691, 0.15391228, 0.14082545, 0.15375912, 0.15185909,
       0.14997056, 0.1548337 , 0.15414204, 0.15421586, 0.14926204,
       0.15465142, 0.15416095, 0.16461125, 0.15822984, 0.15712994,
       0.15597245, 0.15309568, 0.14750645, 0.13814154, 0.15503491,
       0.15386963, 0.15916605, 0.16225797, 0.41035908, 0.3862005 ,
       0.42294383, 0.4652704 , 0.39976633, 0.33235806, 0.40886942,
       0.42219275, 0.37192416, 0.35318452, 0.4393332 , 0.47563294,
       0.38049513, 0.38787317, 0.44683915, 0.45800355, 0.36684597,
       0.38384235, 0.4708737 , 0.43070483, 0.35233247, 0.38967684,
       0.4623214 , 0.4322304 , 0.3431528 , 0.48618048, 0.5155954 ,
       0.45486328, 0.3678665 , 0.47173417, 0.42963988, 0.35994455,
       0.3420385 , 0.44578534, 0.40666974, 0.33889282, 0.38821