# CWRU Experiments
## Imports

In [1]:
import pandas as pd
import numpy as np

import lib.transformers as tf
from sklearn.pipeline import Pipeline

from lib.models import Ganomaly1d, Ganomaly2d, GanomalyFE, GanomalyNet
from lib.visualization import GANomalyBoard, rename_tensorboard_key

from skorch.callbacks import PassthroughScoring, ProgressBar
import torch

from torch.utils.tensorboard import SummaryWriter
from sklearn.model_selection import GridSearchCV

from sklearn.model_selection import train_test_split

from keras.datasets import mnist
import tensorflow

from sklearn.preprocessing import StandardScaler, MinMaxScaler

from lib.others import create_dataset

from lib.others import build_model
from lib.visualization import lineplot_comparison

import seaborn as sns
import matplotlib.pyplot as plt

In [2]:
cwruData0 = pd.read_parquet('data/cwru0.parquet')
cwruData1 = pd.read_parquet('data/cwru1.parquet')

cwruData = pd.concat([cwruData0, cwruData1])

## Settings

In [3]:
torch.manual_seed(0)

<torch._C.Generator at 0x2005513a8f0>

## Creating Datasets

In [4]:
normal_features, normal_labels = create_dataset(cwruData, feature_columns = ['fanEndData', 'driveEndData'], label_columns = ['condition', 'faultDiameter', 'motorLoad', 'relativeFaultPosition', 'faultyBearingPosition'], condition = ['Normal Baseline'], sampleRate = [12000])
normal_features_train, normal_features_test, normal_labels_train, normal_labels_test = train_test_split(normal_features, normal_labels, train_size = 400, test_size= 451, random_state = 0)

ball_features, ball_labels = create_dataset(cwruData, feature_columns = ['fanEndData'], label_columns = ['condition', 'faultDiameter', 'motorLoad', 'relativeFaultPosition', 'faultyBearingPosition'], condition = ['Ball Fault'], sampleRate = [12000])
inner_features, inner_labels = create_dataset(cwruData, feature_columns = ['fanEndData'], label_columns = ['condition', 'faultDiameter', 'motorLoad', 'relativeFaultPosition', 'faultyBearingPosition'], condition = ['Inner Race Fault'], sampleRate = [12000])
outer_features, outer_labels = create_dataset(cwruData, feature_columns = ['fanEndData'], label_columns = ['condition', 'faultDiameter', 'motorLoad', 'relativeFaultPosition', 'faultyBearingPosition'], condition = ['Outer Race Fault'], sampleRate = [12000])


labels_test = pd.concat([ball_labels, inner_labels, outer_labels, normal_labels_test])
features_test = pd.concat([ball_features, inner_features, outer_features, normal_features_test])

normal_features_test = np.array(normal_features_test.to_list())
normal_features_train = np.array(normal_features_train.to_list())

## Data Preparation
Building a pipeline of custom transformers to fetch and preprocess CWRU data.

In [5]:
max_epochs = 5

# ganomaly_timeseries = build_model(Ganomaly1d, 3136, max_epochs, 'timeseries11', plot_type = 'lineplot', plot_shape = 3136, plot_latent_shape =600, n_samples = 4)
# ganomaly_stacked_ts = build_model(Ganomaly2d, 56, max_epochs, 'stacked_timeseries11', plot_type = 'lineplot', plot_shape = 3136, plot_latent_shape =600, n_samples = 4)
ganomaly_fft = build_model(Ganomaly1d, 1568, max_epochs, 'fourier_transform12', plot_type = 'lineplot', plot_shape = 1568, plot_latent_shape =600, n_samples = 4)
# ganomaly_stft = build_model(Ganomaly2d, 56, max_epochs, 'short_term_fourier11', plot_type = 'image', plot_shape = 56, plot_latent_shape =600, n_samples = 36)
# ganomaly_fe = build_model(GanomalyFE, 4, max_epochs, 'feature_extraction11', needs_feature_engineering = True, plot_type = 'barplot', plot_shape = 16, n_samples = 4, plot_latent_shape = 32)


# model_stft = \
# Pipeline(steps=[
#                 ('ArraySTFT', tf.ArraySTFT()),
#                 ('StandardScaler', MinMaxScaler()),
#                 ('ArrayReshaper2', tf.ArrayReshaper((1, 56, 56))),
#                 ('ArrayRetyper', tf.ArrayRetyper(np.float32)),
#                 ('Model', ganomaly_stft)
#                ])


# model_timeseries = \
# Pipeline(steps=[
#                 ('StandardScaler', MinMaxScaler()),
#                 ('ArrayReshaper', tf.ArrayReshaper((1, 3136))),
#                 ('ArrayRetyper', tf.ArrayRetyper(np.float32)),
#                 ('Model', ganomaly_timeseries)
#                ])

# model_stacked_ts = \
# Pipeline(steps=[
#                 ('StandardScaler', MinMaxScaler()),
#                 ('ArrayReshaper', tf.ArrayReshaper((1, 56, 56))),
#                 ('ArrayRetyper', tf.ArrayRetyper(np.float32)),
#                 ('Model', ganomaly_stacked_ts)
#                ])

# model_fe = \
# Pipeline(steps=[
#                 ('FeatureExtractor', tf.FeatureExtractor()),
#                 ('StandardScaler', MinMaxScaler()),
#                 ('ArrayReshaper', tf.ArrayReshaper((1, 4, 4))),
#                 ('ArrayRetyper', tf.ArrayRetyper(np.float32)),
#                 ('Model', ganomaly_fe)
#               ])


model_fft = \
Pipeline(steps=[
                ('ArrayRealFFT', tf.ArrayRealFFT()),
                ('ArrayReshaper', tf.ArrayReshaper((1, 1568))),
                ('StandardScaler', tf.ArrayMinMaxScaler()),
                ('ArrayRetyper', tf.ArrayRetyper(np.float32)),
                ('Model', ganomaly_fft)
               ])

# model_timeseries.fit(normal_features_train)
# model_stacked_ts.fit(normal_features_train)
# model_fe.fit(normal_features_train)
# model_stft.fit(normal_features_train)
model_fft.fit(normal_features_train)

  epoch    appearant_loss    discriminator_loss    fraud_loss    generator_loss    latent_loss    train_loss      dur
-------  ----------------  --------------------  ------------  ----------------  -------------  ------------  -------
      1            [36m0.0401[0m                [32m0.3508[0m        [35m0.7457[0m            [31m2.1757[0m         [94m0.2281[0m        [36m2.5265[0m  10.9921
      2            [36m0.0128[0m                [32m0.0872[0m        0.7608            [31m1.1912[0m         [94m0.0454[0m        [36m1.2784[0m  12.4826
      3            [36m0.0103[0m                [32m0.0513[0m        0.7658            [31m1.0986[0m         [94m0.0240[0m        [36m1.1499[0m  12.1865
      4            [36m0.0093[0m                [32m0.0382[0m        0.7680            [31m1.0644[0m         [94m0.0165[0m        [36m1.1026[0m  10.5130
      5            [36m0.0089[0m                [32m0.0310[0m        0.7695            [31m1.0487

Pipeline(steps=[('ArrayRealFFT',
                 <lib.transformers.ArrayRealFFT object at 0x000002005CEA38E0>),
                ('ArrayReshaper',
                 <lib.transformers.ArrayReshaper object at 0x000002005CEA39D0>),
                ('StandardScaler',
                 <lib.transformers.ArrayMinMaxScaler object at 0x000002005CEA3700>),
                ('ArrayRetyper',
                 <lib.transformers.ArrayRetyper object at 0x000002005CEA3790>),
                ('Model',
                 <class 'lib.model...
          (pyramid-relu-128): LeakyReLU(negative_slope=0.2, inplace=True)
          (pyramid-128-256-convt): Conv1d(128, 256, kernel_size=(16,), stride=(4,), bias=False)
          (pyramid-256-batchnorm): BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (pyramid-relu-256): LeakyReLU(negative_slope=0.2, inplace=True)
          (final-256-1-convt): Conv1d(256, 600, kernel_size=(9,), stride=(1,), bias=False)
        )
      )
    )


# GridSearchCV

In [6]:
search_parameters = {
    'Model__module__w_app': list(range(0, 31, 30)),
}

In [8]:
ganomaly_gs = GridSearchCV(model_fft, search_parameters, refit=False, cv=2)

In [9]:
gs = ganomaly_gs.fit(normal_features_train)

  epoch    appearant_loss    discriminator_loss    fraud_loss    generator_loss    latent_loss    train_loss     dur
-------  ----------------  --------------------  ------------  ----------------  -------------  ------------  ------
      1            [36m0.1534[0m                [32m0.4849[0m        [35m0.7519[0m            [31m0.9813[0m         [94m0.2294[0m        [36m1.4661[0m  7.0277
      2            0.1614                [32m0.1830[0m        0.7561            [31m0.7899[0m         [94m0.0338[0m        [36m0.9729[0m  6.8462
      3            0.1628                [32m0.1049[0m        0.7579            [31m0.7791[0m         [94m0.0212[0m        [36m0.8840[0m  6.9363
      4            0.1637                [32m0.0754[0m        0.7585            [31m0.7746[0m         [94m0.0161[0m        [36m0.8500[0m  6.9009
      5            0.1644                [32m0.0598[0m        0.7589            [31m0.7725[0m         [94m0.0136[0m        [36m0.

In [10]:
ganomaly_gs.cv_results_

{'mean_fit_time': array([34.72871351, 34.7830472 ]),
 'std_fit_time': array([0.06977677, 0.13865519]),
 'mean_score_time': array([0.53931069, 0.53955626]),
 'std_score_time': array([2.23875046e-03, 7.15255737e-07]),
 'param_Model__module__w_app': masked_array(data=[0, 30],
              mask=[False, False],
        fill_value='?',
             dtype=object),
 'params': [{'Model__module__w_app': 0}, {'Model__module__w_app': 30}],
 'split0_test_generator_loss': array([-0.77237105, -1.11925662]),
 'split1_test_generator_loss': array([-0.7644034 , -1.10774088]),
 'mean_test_generator_loss': array([-0.76838723, -1.11349875]),
 'std_test_generator_loss': array([0.00398383, 0.00575787]),
 'rank_test_generator_loss': array([1, 2]),
 'split0_test_train_loss': array([-0.83086701, -1.17399347]),
 'split1_test_train_loss': array([-0.81963332, -1.15616045]),
 'mean_test_train_loss': array([-0.82525017, -1.16507696]),
 'std_test_train_loss': array([0.00561685, 0.00891651]),
 'rank_test_train_loss': 

In [None]:
ganomaly_gs.best_score_

In [None]:
make_scorer

## Results


### Timeseries

In [None]:
features_test = np.array(features_test.tolist())
predictions = model_timeseries.predict_proba(features_test)

predictions = pd.DataFrame(predictions)
predictions = predictions.T
predictions = predictions.rename({0: 'anomaly_score', 1: 'X', 2: 'fake', 3: 'latent_in', 4: 'latent_o'}, axis = 1)

columns_flatten = ['X', 'fake', 'latent_in', 'latent_o']
predictions[columns_flatten] = predictions[columns_flatten].applymap(lambda array: array.flatten())


result = labels_test.reset_index(drop=True).join(predictions)
result  = result.reset_index(drop = True)

result['relativeFaultPosition'] = result['relativeFaultPosition'].fillna('not available')
result['faultDiameter'] = result['faultDiameter'].fillna(-1)

In [None]:
sns.set_style('darkgrid')


sns.set(rc={'figure.figsize':(10, 6)})
fig, ax = plt.subplots()


selection2 = result[result['index'].isin([20, 6, 160, 0, 1, 2, 3])]
sns.stripplot(data = result, y = 'anomaly_score', x = 'condition', palette = ['mediumseagreen', 'lightsalmon', 'cornflowerblue', 'lightcoral'], alpha = 0.7, jitter = 0.3,  ax = ax, linewidth = .1, size = 7)
ax.set_yscale('log')

selection = result[((result['faultDiameter'] == 0.021) & (result['motorLoad'] == 0) & (result['vibrationOrigin'] == 'fanEndData')) | (result['condition'] == 'Normal Baseline') & (result['vibrationOrigin'] == 'fanEndData')]


fig2, ax2 = plt.subplots()
sns.stripplot(data = selection, y = 'anomaly_score', x = 'condition', hue = 'relativeFaultPosition', palette = ['grey', 'mediumseagreen', 'salmon', 'cornflowerblue'], ax = ax2, alpha = 0.7, jitter = 0.3, linewidth = .1, size = 7)
ax2.set_yscale('log')
ax2.set(ylim=(0.07, None))
ax2.legend(fontsize='large', title_fontsize='30')

In [None]:
lineplot_comparison(result, 'X', 'fake', 'Feature Extraction', 'Index', 'Amplitude')

In [None]:
lineplot_comparison(result, 'latent_in', 'latent_o', 'Latent Space', 'Index', 'Amplitude')

### Stacked Timeseries

In [None]:
features_test = np.array(features_test.tolist())
predictions = model_stacked_ts.predict_proba(features_test)

predictions = pd.DataFrame(predictions)
predictions = predictions.T
predictions = predictions.rename({0: 'anomaly_score', 1: 'X', 2: 'fake', 3: 'latent_in', 4: 'latent_o'}, axis = 1)

columns_flatten = ['X', 'fake', 'latent_in', 'latent_o']
predictions[columns_flatten] = predictions[columns_flatten].applymap(lambda array: array.flatten())


result = labels_test.reset_index(drop=True).join(predictions)
result  = result.reset_index(drop = True)

result['relativeFaultPosition'] = result['relativeFaultPosition'].fillna('not available')
result['faultDiameter'] = result['faultDiameter'].fillna(-1)

In [None]:
sns.set_style('darkgrid')


sns.set(rc={'figure.figsize':(10, 6)})
fig, ax = plt.subplots()


selection2 = result[result['index'].isin([20, 6, 160, 0, 1, 2, 3])]
sns.stripplot(data = result, y = 'anomaly_score', x = 'condition', palette = ['mediumseagreen', 'lightsalmon', 'cornflowerblue', 'lightcoral'], alpha = 0.7, jitter = 0.3,  ax = ax, linewidth = .1, size = 7)
ax.set_yscale('log')

selection = result[((result['faultDiameter'] == 0.021) & (result['motorLoad'] == 0) & (result['vibrationOrigin'] == 'fanEndData')) | (result['condition'] == 'Normal Baseline') & (result['vibrationOrigin'] == 'fanEndData')]



fig2, ax2 = plt.subplots()
sns.stripplot(data = selection, y = 'anomaly_score', x = 'condition', hue = 'relativeFaultPosition', palette = ['grey', 'mediumseagreen', 'salmon', 'cornflowerblue'], ax = ax2, alpha = 0.7, jitter = 0.3, linewidth = .1, size = 7)
ax2.set_yscale('log')
ax2.set(ylim=(0.07, None))
ax2.legend(fontsize='large', title_fontsize='30')

In [None]:
lineplot_comparison(result, 'X', 'fake', 'Feature Extraction', 'Index', 'Amplitude')

In [None]:
lineplot_comparison(result, 'latent_in', 'latent_o', 'Latent Space', 'Index', 'Amplitude')

### FFT

In [None]:
features_test = np.array(features_test.tolist())
predictions = model_fft.predict_proba(features_test)

predictions = pd.DataFrame(predictions)
predictions = predictions.T
predictions = predictions.rename({0: 'anomaly_score', 1: 'X', 2: 'fake', 3: 'latent_in', 4: 'latent_o'}, axis = 1)

columns_flatten = ['X', 'fake', 'latent_in', 'latent_o']
predictions[columns_flatten] = predictions[columns_flatten].applymap(lambda array: array.flatten())


result = labels_test.reset_index(drop=True).join(predictions)
result  = result.reset_index(drop = True)

result['relativeFaultPosition'] = result['relativeFaultPosition'].fillna('not available')
result['faultDiameter'] = result['faultDiameter'].fillna(-1)

In [None]:
sns.set_style('darkgrid')


sns.set(rc={'figure.figsize':(10, 6)})
fig, ax = plt.subplots()


selection2 = result[result['index'].isin([20, 6, 160, 0, 1, 2, 3])]
sns.stripplot(data = result, y = 'anomaly_score', x = 'condition', palette = ['mediumseagreen', 'lightsalmon', 'cornflowerblue', 'lightcoral'], alpha = 0.7, jitter = 0.3,  ax = ax, linewidth = .1, size = 7)
ax.set_yscale('log')

selection = result[((result['faultDiameter'] == 0.021) & (result['motorLoad'] == 0) & (result['vibrationOrigin'] == 'fanEndData')) | (result['condition'] == 'Normal Baseline') & (result['vibrationOrigin'] == 'fanEndData')]



fig2, ax2 = plt.subplots()
sns.stripplot(data = selection, y = 'anomaly_score', x = 'condition', hue = 'relativeFaultPosition', palette = ['grey', 'mediumseagreen', 'salmon', 'cornflowerblue'], ax = ax2, alpha = 0.7, jitter = 0.3, linewidth = .1, size = 7)
ax2.set_yscale('log')
ax2.set(ylim=(0.07, None))
ax2.legend(fontsize='large', title_fontsize='30')

In [None]:
lineplot_comparison(result, 'X', 'fake', 'Feature Extraction', 'Index', 'Amplitude')

In [None]:
lineplot_comparison(result, 'latent_in', 'latent_o', 'Latent Space', 'Index', 'Amplitude')

### STFT

In [None]:
features_test = np.array(features_test.tolist())
predictions = model_stft.predict_proba(features_test)

predictions = pd.DataFrame(predictions)
predictions = predictions.T
predictions = predictions.rename({0: 'anomaly_score', 1: 'X', 2: 'fake', 3: 'latent_in', 4: 'latent_o'}, axis = 1)

columns_flatten = ['X', 'fake', 'latent_in', 'latent_o']
predictions[columns_flatten] = predictions[columns_flatten].applymap(lambda array: array.flatten())


result = labels_test.reset_index(drop=True).join(predictions)
result  = result.reset_index(drop = True)

result['relativeFaultPosition'] = result['relativeFaultPosition'].fillna('not available')
result['faultDiameter'] = result['faultDiameter'].fillna(-1)

In [None]:
sns.set_style('darkgrid')


sns.set(rc={'figure.figsize':(10, 6)})
fig, ax = plt.subplots()


selection2 = result[result['index'].isin([20, 6, 160, 0, 1, 2, 3])]
sns.stripplot(data = result, y = 'anomaly_score', x = 'condition', palette = ['mediumseagreen', 'lightsalmon', 'cornflowerblue', 'lightcoral'], alpha = 0.7, jitter = 0.3,  ax = ax, linewidth = .1, size = 7)
ax.set_yscale('log')

selection = result[((result['faultDiameter'] == 0.021) & (result['motorLoad'] == 0) & (result['vibrationOrigin'] == 'fanEndData')) | (result['condition'] == 'Normal Baseline') & (result['vibrationOrigin'] == 'fanEndData')]



fig2, ax2 = plt.subplots()
sns.stripplot(data = selection, y = 'anomaly_score', x = 'condition', hue = 'relativeFaultPosition', palette = ['grey', 'mediumseagreen', 'salmon', 'cornflowerblue'], ax = ax2, alpha = 0.7, jitter = 0.3, linewidth = .1, size = 7)
ax2.set_yscale('log')
ax2.set(ylim=(0.07, None))
ax2.legend(fontsize='large', title_fontsize='30')

### Feature Extraction

In [None]:
features_test = np.array(features_test.tolist())
predictions = model_fe.predict_proba(features_test)

predictions = pd.DataFrame(predictions)
predictions = predictions.T
predictions = predictions.rename({0: 'anomaly_score', 1: 'X', 2: 'fake', 3: 'latent_in', 4: 'latent_o'}, axis = 1)

columns_flatten = ['X', 'fake', 'latent_in', 'latent_o']
predictions[columns_flatten] = predictions[columns_flatten].applymap(lambda array: array.flatten())


result = labels_test.reset_index(drop=True).join(predictions)
result  = result.reset_index(drop = True)

result['relativeFaultPosition'] = result['relativeFaultPosition'].fillna('not available')
result['faultDiameter'] = result['faultDiameter'].fillna(-1)

In [None]:
sns.set_style('darkgrid')


sns.set(rc={'figure.figsize':(10, 6)})
fig, ax = plt.subplots()


selection2 = result[result['index'].isin([20, 6, 160, 0, 1, 2, 3])]
sns.stripplot(data = result, y = 'anomaly_score', x = 'condition', palette = ['mediumseagreen', 'lightsalmon', 'cornflowerblue', 'lightcoral'], alpha = 0.7, jitter = 0.3,  ax = ax, linewidth = .1, size = 7)
ax.set_yscale('log')

selection = result[((result['faultDiameter'] == 0.021) & (result['motorLoad'] == 0) & (result['vibrationOrigin'] == 'fanEndData')) | (result['condition'] == 'Normal Baseline') & (result['vibrationOrigin'] == 'fanEndData')]



fig2, ax2 = plt.subplots()
sns.stripplot(data = selection, y = 'anomaly_score', x = 'condition', hue = 'relativeFaultPosition', palette = ['grey', 'mediumseagreen', 'salmon', 'cornflowerblue'], ax = ax2, alpha = 0.7, jitter = 0.3, linewidth = .1, size = 7)
ax2.set_yscale('log')
ax2.set(ylim=(0.07, None))
ax2.legend(fontsize='large', title_fontsize='30')