# CWRU Experiments
## Imports

In [1]:
import pandas as pd
import numpy as np

import lib.transformers as tf
from sklearn.pipeline import Pipeline

from lib.models import Ganomaly1d, Ganomaly2d, GanomalyFE, GanomalyNet
from lib.visualization import GANomalyBoard, rename_tensorboard_key

from skorch.callbacks import PassthroughScoring, ProgressBar
import torch

from torch.utils.tensorboard import SummaryWriter
from sklearn.model_selection import GridSearchCV

from sklearn.model_selection import train_test_split

from keras.datasets import mnist
import tensorflow

from sklearn.preprocessing import StandardScaler, MinMaxScaler

In [2]:
cwruData0 = pd.read_parquet('data/cwru0.parquet')
cwruData1 = pd.read_parquet('data/cwru1.parquet')

cwruData = pd.concat([cwruData0, cwruData1])

## Settings

In [3]:
torch.manual_seed(0)

<torch._C.Generator at 0x273f917c9b0>

## Initializing Model(s)

In [4]:
def build_model(model, isize, max_epochs, directory, needs_feature_engineering = False, ngpu = 0, nz = 600, ndf = 64, ngf = 64, nc = 1, batch_size = 16, lr = 0.0001, beta1 = 0.5, beta2 = 0.999, workers = 2):
    
    if not needs_feature_engineering:
        output_model = GanomalyNet(
            model,
            module__isize = isize,
            module__nz=nz,
            module__ndf=ndf,
            module__ngf=ngf,
            module__nc=nc,
            module__ngpu=ngpu,
            module__w_app = 30,
            module__w_lambda = 30/31,



            device = torch.device("cuda:0") if torch.cuda.is_available() else 'cpu',

            criterion=torch.nn.BCELoss,

            optimizer_gen=torch.optim.Adam,
            optimizer_gen__lr=lr,
            optimizer_gen__betas=(beta1, beta2),

            optimizer_dis=torch.optim.Adam,
            optimizer_dis__lr=lr,
            optimizer_dis__betas=(beta1, beta2),

            batch_size=batch_size,
            max_epochs=max_epochs,

            train_split=False,  # not implemented
            iterator_train__shuffle=True,
            iterator_train__num_workers=workers,
            iterator_valid__num_workers=workers,

            callbacks=[
                PassthroughScoring('loss_dis', on_train=True),
                PassthroughScoring('loss_gen', on_train=True),
                PassthroughScoring('loss_gen_fra', on_train=True),
                PassthroughScoring('loss_gen_app', on_train=True),
                PassthroughScoring('loss_gen_lat', on_train=True),
                GANomalyBoard(SummaryWriter(log_dir= 'runs/' + directory), key_mapper = rename_tensorboard_key, close_after_train = False)
            ]
        )
    else:
            output_model = GanomalyNet(
            model,
            module__isize = isize,
            module__ngpu=ngpu,

            device = torch.device("cuda:0") if torch.cuda.is_available() else 'cpu',

            criterion=torch.nn.BCELoss,

            optimizer_gen=torch.optim.Adam,
            optimizer_gen__lr=lr,
            optimizer_gen__betas=(beta1, beta2),

            optimizer_dis=torch.optim.Adam,
            optimizer_dis__lr=lr,
            optimizer_dis__betas=(beta1, beta2),

            batch_size=batch_size,
            max_epochs=max_epochs,

            train_split=False,  # not implemented
            iterator_train__shuffle=True,
            iterator_train__num_workers=workers,
            iterator_valid__num_workers=workers,

            callbacks=[
                PassthroughScoring('loss_dis', on_train=True),
                PassthroughScoring('loss_gen', on_train=True),
                PassthroughScoring('loss_gen_fra', on_train=True),
                PassthroughScoring('loss_gen_app', on_train=True),
                PassthroughScoring('loss_gen_lat', on_train=True),
                #ProgressBar(),
                GANomalyBoard(SummaryWriter(log_dir= 'runs/' + directory), key_mapper = rename_tensorboard_key, close_after_train = False)
            ]
        )
    
    return output_model

## Data Preparation
Building a pipeline of custom transformers to fetch and preprocess CWRU data.

In [5]:
common_preprocessing = \
Pipeline(steps=[
                ('DataSelector', tf.DataSelector(columns = ['fanEndData', 'driveEndData'], column_values = {'condition': ['Normal Baseline'], 'sampleRate': [12000]})),
                ('ArrayFlattener', tf.ArrayFlattener()),
                ('ArrayChunker', tf.ArrayChunker(3136)),
                ('ArrayFlattener2', tf.ArrayFlattener())
               ])

In [6]:
train, test = train_test_split(common_preprocessing.transform(cwruData), train_size = 400, test_size= 400, random_state = 0)

  X_ = np.array(X_)


In [7]:
max_epochs = 50

ganomaly_timeseries = build_model(Ganomaly1d, 3136, max_epochs, 'timeseries')
ganomaly_stacked_ts = build_model(Ganomaly2d, 56, max_epochs, 'stacked_timeseries103')
ganomaly_fft = build_model(Ganomaly1d, 1568, max_epochs, 'fourier_transform2')
ganomaly_stft = build_model(Ganomaly2d, 56, max_epochs, 'short_term_fourier2')
ganomaly_fe = build_model(GanomalyFE, 4, max_epochs, 'feature_extraction2', True)

model_timeseries = \
Pipeline(steps=[
                ('StandardScaler', MinMaxScaler()),
                ('ArrayReshaper', tf.ArrayReshaper((1, 3136))),
                ('Model', ganomaly_timeseries)
               ])

model_stacked_ts = \
Pipeline(steps=[
                ('StandardScaler', MinMaxScaler()),
                ('ArrayReshaper', tf.ArrayReshaper((1, 56, 56))),
                ('Model', ganomaly_stacked_ts)
               ])

model_fe = \
Pipeline(steps=[
                ('FeatureExtractor', tf.FeatureExtractor(axis = 1)),
                #('StandardScaler', MinMaxScaler()),
                ('ArrayReshaper', tf.ArrayReshaper((1, 4, 4))),
                ('Model', ganomaly_fe)
               ])

model_stft = \
Pipeline(steps=[
                ('ArraySTFT', tf.ArraySTFT()),
                ('ArrayReshaper', tf.ArrayReshaper((3136))),
                ('StandardScaler', MinMaxScaler()),
                ('ArrayReshaper2', tf.ArrayReshaper((1, 56, 56))),
                ('Model', ganomaly_stft)
               ])

model_fft = \
Pipeline(steps=[
                ('ArrayFFT', tf.ArrayFFT()),
                ('StandardScaler', MinMaxScaler()),
                ('ArrayReshaper2', tf.ArrayReshaper((1, 1568))),
                ('Model', ganomaly_fft)
               ])


#model_timeseries.fit(train)
#model_stacked_ts.fit(train)
#model_fe.fit(train)
#model_stft.fit(train)
#model_fft.fit(train)

In [8]:
model_timeseries.fit(train).steps[-1][1]

KeyboardInterrupt: 

In [None]:
error,X,  fake, latent_i, latent_o = model_stacked_ts.predict_proba(common_preprocessing.set_params(DataSelector__column_values = {'condition': ['Outer Race Fault'], 'sampleRate': [12000]}).transform(cwruData)[:400])

In [None]:
error2,Y,  Yfake, latent_i, latent_o = model_stacked_ts.predict_proba(test)

In [None]:
import seaborn as sns
sns.lineplot(data = pd.DataFrame([latent_o[11].flatten(), latent_i[11].flatten()]).T)

In [None]:
import seaborn as sns
sns.lineplot(data = pd.DataFrame([X[11].flatten(), fake[11].flatten()]).T)

In [None]:
import seaborn as sns
sns.lineplot(data = pd.DataFrame([Y[12].flatten(), Yfake[12].flatten()]).T)

In [None]:
import seaborn as sns

# GridSearchCV

In [None]:
search_parameters = {
    'module_w_fra': list(range(0, 101, 10)),
    'module_w_app': list(range(0, 101, 10)),
    'module_w_lat': list(range(0, 101, 10)),
}

In [None]:
#ganomaly_gs = GridSearchCV(ganomaly, search_parameters, refit=False, cv=4)

In [None]:
#ganomaly_gs.fit(train)

## Training Visualization (best parameters)
Adding a TensorBoard for the visualization of the training.

In [None]:
def create_dataset(data, feature_columns, label_columns, sample_length = 0, **column_values): 
    

    for column, values in column_values.items():
        data = data[data[column].isin(values)]
        
    features = data.loc[:, feature_columns]
    labels = data.loc[:, label_columns]
    
    features = features.dropna()
    
    chunked_features = features.applymap(lambda df: list(_chunk(df, 3136, False)))
    stacked_features = chunked_features.stack().explode()

    stacked_features = stacked_features.reset_index(level=[1])
    stacked_features = stacked_features.rename({0: 'vibrationData', 'level_1': 'vibrationOrigin'}, axis = 1)
    
    stacked_features = stacked_features.loc[:, ['vibrationData', 'vibrationOrigin']]
    
    dataset = stacked_features.join(labels, how='left')
    
    dataset = dataset.reset_index(drop = True)
    
    features = dataset['vibrationData']
    labels = dataset.loc[:, label_columns + ['vibrationOrigin']]

    return features, labels

In [None]:
def _chunk(array, chunk_size, keep_rest):

    for position in range(0, len(array), chunk_size):
        result = array[position:position + chunk_size]

        if keep_rest:
            yield result
        else:
            if (len(result) == chunk_size):
                yield result

In [None]:
normal_features, normal_labels = create_dataset(cwruData, feature_columns = ['fanEndData', 'driveEndData'], label_columns = ['condition', 'faultDiameter', 'motorLoad', 'relativeFaultPosition', 'faultyBearingPosition'], condition = ['Normal Baseline'], sampleRate = [12000])
normal_features_train, normal_features_test, normal_labels_train, normal_labels_test = train_test_split(normal_features, normal_labels, train_size = 400, test_size= 451, random_state = 0)

ball_features, ball_labels = create_dataset(cwruData, feature_columns = ['fanEndData'], label_columns = ['condition', 'faultDiameter', 'motorLoad', 'relativeFaultPosition', 'faultyBearingPosition'], condition = ['Ball Fault'], sampleRate = [12000])
inner_features, inner_labels = create_dataset(cwruData, feature_columns = ['fanEndData'], label_columns = ['condition', 'faultDiameter', 'motorLoad', 'relativeFaultPosition', 'faultyBearingPosition'], condition = ['Inner Race Fault'], sampleRate = [12000])
outer_features, outer_labels = create_dataset(cwruData, feature_columns = ['fanEndData'], label_columns = ['condition', 'faultDiameter', 'motorLoad', 'relativeFaultPosition', 'faultyBearingPosition'], condition = ['Outer Race Fault'], sampleRate = [12000])


labels_test = pd.concat([ball_labels, inner_labels, outer_labels, normal_labels_test])
features_test = pd.concat([ball_features, inner_features, outer_features, normal_features_test])

predictions = model_stacked_ts.predict(np.array(features_test.tolist()))
result = labels_test.assign(predictions = predictions)

In [None]:
import seaborn as sns
sns.set_style('whitegrid')
sns.set_context('paper')
ax = sns.catplot(data = result, y = 'predictions', x = 'condition', col = 'motorLoad', kind = 'strip')
ax.set(yscale = 'log')

In [None]:
result.groupby(['condition'])['predictions'].agg(['mean', 'min', 'max'])