# CWRU Experiments
## Imports

In [None]:
import pandas as pd
import numpy as np

import lib.transformers as tfs
from sklearn.pipeline import Pipeline

from lib.models import Ganomaly1d, Ganomaly2d, GanomalyFE, GanomalyNet
from lib.visualization import GANomalyBoard, rename_tensorboard_key

from skorch.callbacks import PassthroughScoring, ProgressBar
import torch

from torch.utils.tensorboard import SummaryWriter
from sklearn.model_selection import GridSearchCV

from sklearn.model_selection import train_test_split

from keras.datasets import mnist
import tensorflow


from lib.others import create_dataset

from lib.others import build_model
from lib.visualization import lineplot_comparison

import seaborn as sns
import matplotlib.pyplot as plt

In [None]:
cwruData0 = pd.read_parquet('data/cwru0.parquet')
cwruData1 = pd.read_parquet('data/cwru1.parquet')

cwruData = pd.concat([cwruData0, cwruData1])

## Settings

In [None]:
# torch.manual_seed(0)
# torch.cuda.manual_seed(0)
# np.random.seed(0)

## Creating Datasets

In [None]:
normal_features, normal_labels = create_dataset(cwruData, feature_columns = ['fanEndData', 'driveEndData'], label_columns = ['condition', 'faultDiameter', 'motorLoad', 'relativeFaultPosition', 'faultyBearingPosition'], condition = ['Normal Baseline'], sampleRate = [12000])
normal_features_train, normal_features_test, normal_labels_train, normal_labels_test = train_test_split(normal_features, normal_labels, train_size = 400, test_size= 451, random_state = 0)

ball_features, ball_labels = create_dataset(cwruData, feature_columns = ['fanEndData'], label_columns = ['condition', 'faultDiameter', 'motorLoad', 'relativeFaultPosition', 'faultyBearingPosition'], condition = ['Ball Fault'], sampleRate = [12000])
inner_features, inner_labels = create_dataset(cwruData, feature_columns = ['fanEndData'], label_columns = ['condition', 'faultDiameter', 'motorLoad', 'relativeFaultPosition', 'faultyBearingPosition'], condition = ['Inner Race Fault'], sampleRate = [12000])
outer_features, outer_labels = create_dataset(cwruData, feature_columns = ['fanEndData'], label_columns = ['condition', 'faultDiameter', 'motorLoad', 'relativeFaultPosition', 'faultyBearingPosition'], condition = ['Outer Race Fault'], sampleRate = [12000])


labels_test = pd.concat([ball_labels, inner_labels, outer_labels, normal_labels_test])
features_test = pd.concat([ball_features, inner_features, outer_features, normal_features_test])

normal_features_test = np.array(normal_features_test.to_list())
normal_features_train = np.array(normal_features_train.to_list())

## Building and training the different Models

In [None]:
# model settings
n_z = 600
n_channels = 1
n_feature_maps = 64

adversarial_weight = 1
contextual_weight = 1
encoder_weight = 70
lambda_weight = 1/70

# training settings
device = torch.device("cuda:0") if torch.cuda.is_available() else 'cpu'
n_gpus = 0
workers = 2
batch_size = 16
max_epochs = 1
lr = 0.0001
beta1 = 0.5
beta2 = 0.999

# extra callbacks
callbacks = []

# run number
run = 0
verbose = 1

### Time Series

In [None]:
gan_time_series =\
build_model(
    model = Ganomaly1d,
    
    # model parameters
    input_size = 3136, 
    n_z = n_z,
    n_channels = n_channels,
    n_fm_discriminator = n_feature_maps,  
    n_fm_generator = n_feature_maps,
    adversarial_weight = adversarial_weight,
    contextual_weight = contextual_weight, 
    encoder_weight = encoder_weight,
    lambda_weight = lambda_weight,
    
    # training parameters
    device = device,
    n_gpus = n_gpus,
    workers = workers,
    batch_size = batch_size,
    max_epochs = max_epochs, 
    lr = 0.0001,
    beta1 = 0.5,
    beta2 = 0.999, 
    
    # logging parameters
    suffix = 'timeseries' + str(run),
    plot_type = 'lineplot', 
    plot_shape = 3136, 
    plot_latent_shape =600, 
    n_samples = 4,
    
    # extra callbacks
    callbacks = callbacks,
    verbose = verbose
)

In [None]:
pipeline_time_series = \
Pipeline(steps=[
                ('reshaper', tfs.ArrayReshaper((1, 3136))),
                ('retyper', tfs.ArrayRetyper(np.float32)),
                ('model', gan_time_series)
               ])

In [None]:
pipeline_time_series.fit(normal_features_train);

### Stacked Time Series

In [None]:
gan_stacked_time_series =\
build_model(
    model = Ganomaly2d,
    
    # model parameters
    input_size = 56, 
    n_z = n_z,
    n_channels = n_channels,
    n_fm_discriminator = n_feature_maps,  
    n_fm_generator = n_feature_maps,
    adversarial_weight = adversarial_weight,
    contextual_weight = contextual_weight, 
    encoder_weight = encoder_weight,
    lambda_weight = lambda_weight,
    
    # training parameters
    device = device,
    n_gpus = n_gpus,
    workers = workers,
    batch_size = batch_size,
    max_epochs = max_epochs, 
    lr = 0.0001,
    beta1 = 0.5,
    beta2 = 0.999, 
    
    # logging parameters
    suffix = 'stacked_timeseries' + str(run), 
    plot_type = 'lineplot', 
    plot_shape = 3136, 
    plot_latent_shape =600, 
    n_samples = 4,
    
    # extra callbacks
    callbacks = callbacks,
    verbose = verbose
)

In [None]:
pipeline_stacked_time_series = \
Pipeline(steps=[
                ('reshaper', tfs.ArrayReshaper((1, 56, 56))),
                ('retyper', tfs.ArrayRetyper(np.float32)),
                ('model', gan_stacked_time_series)
               ])

In [None]:
pipeline_stacked_time_series.fit(normal_features_train);

### Frequency Spectrum

In [None]:
gan_frequency_spectrum =\
build_model(
    model = Ganomaly1d,
    
    # model parameters
    input_size = 1568, 
    n_z = n_z,
    n_channels = n_channels,
    n_fm_discriminator = n_feature_maps,  
    n_fm_generator = n_feature_maps,
    adversarial_weight = adversarial_weight,
    contextual_weight = contextual_weight, 
    encoder_weight = encoder_weight,
    lambda_weight = lambda_weight,
    
    # training parameters
    device = device,
    n_gpus = n_gpus,
    workers = workers,
    batch_size = batch_size,
    max_epochs = max_epochs, 
    lr = lr,
    beta1 = beta1,
    beta2 = beta2, 
    
    # logging parameters
    suffix = 'frequency_spectrum' + str(run), 
    plot_type = 'lineplot', 
    plot_shape = 1568, 
    plot_latent_shape = 600, 
    n_samples = 4,
    
    # extra callbacks
    callbacks = callbacks,
    verbose = verbose
)

In [None]:
pipeline_frequency_spectrum = \
Pipeline(steps=[
                ('fourier_transform', tfs.ArrayFFT()),
                ('reshaper', tfs.ArrayReshaper((1, 1568))),
                ('retyper', tfs.ArrayRetyper(np.float32)),
                ('model', gan_frequency_spectrum)
               ])

In [None]:
pipeline_frequency_spectrum.fit(normal_features_train);

### Spectrogram

In [None]:
gan_spectrogram =\
build_model(
    model = Ganomaly2d,
    
    # model parameters
    input_size = 56, 
    n_z = n_z,
    n_channels = n_channels,
    n_fm_discriminator = n_feature_maps,  
    n_fm_generator = n_feature_maps,
    adversarial_weight = adversarial_weight,
    contextual_weight = contextual_weight, 
    encoder_weight = encoder_weight,
    lambda_weight = lambda_weight,
    
    # training parameters
    device = device,
    n_gpus = n_gpus,
    workers = workers,
    batch_size = batch_size,
    max_epochs = max_epochs, 
    lr = lr,
    beta1 = beta1,
    beta2 = beta2, 
    
    # logging parameters
    suffix = 'spectrograms' + str(run),
    plot_type = 'image', 
    plot_shape = 56, 
    plot_latent_shape =600, 
    n_samples = 36,

    # extra callbacks
    callbacks = callbacks,
    verbose = verbose
)

In [None]:
pipeline_spectrogram = \
Pipeline(steps=[
                ('stft_transform', tfs.ArraySTFT()),
                ('reshaper', tfs.ArrayReshaper((1, 56, 56))),
                ('retyper', tfs.ArrayRetyper(np.float32)),
                ('model', gan_spectrogram)
               ])

In [None]:
pipeline_spectrogram.fit(normal_features_train);

### Feature Extraction

In [None]:
gan_feature_extraction =\
build_model(
    model = GanomalyFE,
    
    # model parameters
    input_size = 4, 
    adversarial_weight = adversarial_weight,
    contextual_weight = contextual_weight, 
    encoder_weight = encoder_weight,
    lambda_weight = lambda_weight,
    
    # training parameters
    device = device,
    n_gpus = n_gpus,
    workers = workers,
    batch_size = batch_size,
    max_epochs = max_epochs, 
    lr = lr,
    beta1 = beta1,
    beta2 = beta2, 
    
    # logging parameters
    suffix = 'feature_extraction' + str(run), 
    plot_type = 'barplot', 
    plot_shape = 16, 
    plot_latent_shape = 32, 
    n_samples = 4,

    # extra callbacks
    callbacks = callbacks,
    verbose = verbose
)

In [None]:
pipeline_feature_extraction = \
Pipeline(steps=[
                ('feature_extractor', tfs.FeatureExtractor()),
                ('reshaper', tfs.ArrayReshaper((1, 4, 4))),
                ('retyper', tfs.ArrayRetyper(np.float32)),
                ('model', gan_feature_extraction)
              ])

In [None]:
pipeline_feature_extraction.fit(normal_features_train);

## Results


### Time Series

In [None]:
features_test = np.array(features_test.tolist())
predictions = pipeline_time_series.predict_proba(features_test)

predictions = pd.DataFrame(predictions)
predictions = predictions.T
predictions = predictions.rename({0: 'anomaly_score', 1: 'Input', 2: 'Input Reconstruction', 3: 'Latent Input', 4: 'Latent Reconstruction'}, axis = 1)

columns_flatten = ['Input', 'Input Reconstruction', 'Latent Input', 'Latent Reconstruction']
predictions[columns_flatten] = predictions[columns_flatten].applymap(lambda array: array.flatten())


result = labels_test.reset_index(drop=True).join(predictions)
result  = result.reset_index(drop = True)


result['relativeFaultPosition'] = result['relativeFaultPosition'].fillna('not available')
result['faultDiameter'] = result['faultDiameter'].fillna(-1)

In [None]:
sns.set(rc={'figure.figsize':(12, 6)}, style = 'darkgrid')
fig, ax = plt.subplots()

jitter = 0.3
offset = 0.05

sns.stripplot(data = result, y = 'anomaly_score', x = 'condition', palette = ['mediumseagreen', 'lightsalmon', 'cornflowerblue', 'lightcoral'], alpha = 0.7, jitter = jitter,  ax = ax, linewidth = .1, size = 7)
ax.set_yscale('log')

for y, condition in enumerate(result['condition'].unique()):
    condition_results = result[result['condition'] == condition]['anomaly_score'].astype(np.float32)
    ax.text(y + jitter + offset, condition_results.max(), condition_results.max().round(3))
    ax.text(y + jitter + offset, condition_results.mean(), condition_results.mean().round(3))
    ax.text(y + jitter + offset, condition_results.min(), condition_results.min().round(3))
    
ax.set_xlim(None, y + jitter + offset + 0.3)
    
ax.set_ylabel('Anomaly Score')
ax.set_xlabel('Condition')
ax.set_title('Anomaly Score Time Series')

fig.savefig('data/results/anomaly-score_time-series.png', dpi=330, bbox_inches='tight')

In [None]:
comparison = lineplot_comparison(result, 'Input', 'Input Reconstruction', 'Input Comparison Time Series', 'Index', 'Amplitude')
comparison.savefig('data/results/input-reconstruction_time-series.png', dpi=330, bbox_inches='tight')

In [None]:
comparison = lineplot_comparison(result, 'Latent Input', 'Latent Reconstruction', 'Latent Comparison Time Series', 'Index', 'Amplitude')
comparison.savefig('data/results/latent-reconstruction_time-series.png', dpi=330, bbox_inches='tight')

### Stacked Time Series

In [None]:
features_test = np.array(features_test.tolist())
predictions = pipeline_stacked_time_series.predict_proba(features_test)

predictions = pd.DataFrame(predictions)
predictions = predictions.T
predictions = predictions.rename({0: 'anomaly_score', 1: 'Input', 2: 'Input Reconstruction', 3: 'Latent Input', 4: 'Latent Reconstruction'}, axis = 1)

columns_flatten = ['Input', 'Input Reconstruction', 'Latent Input', 'Latent Reconstruction']
predictions[columns_flatten] = predictions[columns_flatten].applymap(lambda array: array.flatten())


result = labels_test.reset_index(drop=True).join(predictions)
result  = result.reset_index(drop = True)


result['relativeFaultPosition'] = result['relativeFaultPosition'].fillna('not available')
result['faultDiameter'] = result['faultDiameter'].fillna(-1)

In [None]:
sns.set_style('darkgrid')

sns.set(rc={'figure.figsize':(12, 6)})
fig, ax = plt.subplots()

jitter = 0.3
offset = 0.05

sns.stripplot(data = result, y = 'anomaly_score', x = 'condition', palette = ['mediumseagreen', 'lightsalmon', 'cornflowerblue', 'lightcoral'], alpha = 0.7, jitter = jitter,  ax = ax, linewidth = .1, size = 7)
ax.set_yscale('log')

for y, condition in enumerate(result['condition'].unique()):
    condition_results = result[result['condition'] == condition]['anomaly_score'].astype(np.float32)
    ax.text(y + jitter + offset, condition_results.max(), condition_results.max().round(3))
    ax.text(y + jitter + offset, condition_results.mean(), condition_results.mean().round(3))
    ax.text(y + jitter + offset, condition_results.min(), condition_results.min().round(3))
    
ax.set_xlim(None, y + jitter + offset + 0.3)
    
ax.set_ylabel('Anomaly Score')
ax.set_xlabel('Condition')
ax.set_title('Anomaly Score Stacked Time Series')

fig.savefig('data/results/anomaly-score_stacked-time-series.png', dpi=330, bbox_inches='tight')

In [None]:
comparison = lineplot_comparison(result, 'Input', 'Input Reconstruction', 'Input Comparison Stacked Time Series', 'Index', 'Amplitude')
comparison.savefig('data/results/input-reconstruction_stacked-time-series.png', dpi=330, bbox_inches='tight')

In [None]:
comparison = lineplot_comparison(result, 'Latent Input', 'Latent Reconstruction', 'Latent Comparison Stacked Time Series', 'Index', 'Amplitude')
comparison.savefig('data/results/latent-reconstruction_stacked-time-series.png', dpi=330, bbox_inches='tight')

### Frequency Spectrum

In [None]:
features_test = np.array(features_test.tolist())
predictions = pipeline_frequency_spectrum.predict_proba(features_test)

predictions = pd.DataFrame(predictions)
predictions = predictions.T
predictions = predictions.rename({0: 'anomaly_score', 1: 'Input', 2: 'Input Reconstruction', 3: 'Latent Input', 4: 'Latent Reconstruction'}, axis = 1)

columns_flatten = ['Input', 'Input Reconstruction', 'Latent Input', 'Latent Reconstruction']
predictions[columns_flatten] = predictions[columns_flatten].applymap(lambda array: array.flatten())


result = labels_test.reset_index(drop=True).join(predictions)
result  = result.reset_index(drop = True)


result['relativeFaultPosition'] = result['relativeFaultPosition'].fillna('not available')
result['faultDiameter'] = result['faultDiameter'].fillna(-1)

In [None]:
sns.set_style('darkgrid')

sns.set(rc={'figure.figsize':(12, 6)})
fig, ax = plt.subplots()

jitter = 0.3
offset = 0.05

sns.stripplot(data = result, y = 'anomaly_score', x = 'condition', palette = ['mediumseagreen', 'lightsalmon', 'cornflowerblue', 'lightcoral'], alpha = 0.7, jitter = jitter,  ax = ax, linewidth = .1, size = 7)
ax.set_yscale('log')

for y, condition in enumerate(result['condition'].unique()):
    condition_results = result[result['condition'] == condition]['anomaly_score'].astype(np.float32)
    ax.text(y + jitter + offset, condition_results.max(), condition_results.max().round(3))
    ax.text(y + jitter + offset, condition_results.mean(), condition_results.mean().round(3))
    ax.text(y + jitter + offset, condition_results.min(), condition_results.min().round(3))
    
ax.set_xlim(None, y + jitter + offset + 0.3)
    
ax.set_ylabel('Anomaly Score')
ax.set_xlabel('Condition')
ax.set_title('Anomaly Score Frequency Spectrum')

fig.savefig('data/results/anomaly-score_frequency-spectrum.png', dpi=330, bbox_inches='tight')

In [None]:
comparison = lineplot_comparison(result, 'Input', 'Input Reconstruction', 'Input Comparison Frequency Spectrum', 'Index', 'Amplitude')
comparison.savefig('data/results/input-reconstruction_frequency-spectrum.png', dpi=330, bbox_inches='tight')

In [None]:
comparison = lineplot_comparison(result, 'Latent Input', 'Latent Reconstruction', 'Latent Comparison Frequency Spectrum', 'Index', 'Amplitude')
comparison.savefig('data/results/latent-reconstruction_frequency-spectrum.png', dpi=330, bbox_inches='tight')

### Spectrogram

In [None]:
features_test = np.array(features_test.tolist())
predictions = pipeline_spectrogram.predict_proba(features_test)

predictions = pd.DataFrame(predictions)
predictions = predictions.T
predictions = predictions.rename({0: 'anomaly_score', 1: 'Input', 2: 'Input Reconstruction', 3: 'Latent Input', 4: 'Latent Reconstruction'}, axis = 1)

columns_flatten = ['Input', 'Input Reconstruction', 'Latent Input', 'Latent Reconstruction']
predictions[columns_flatten] = predictions[columns_flatten].applymap(lambda array: array.flatten())


result = labels_test.reset_index(drop=True).join(predictions)
result  = result.reset_index(drop = True)


result['relativeFaultPosition'] = result['relativeFaultPosition'].fillna('not available')
result['faultDiameter'] = result['faultDiameter'].fillna(-1)

In [None]:
sns.set_style('darkgrid')

sns.set(rc={'figure.figsize':(12, 6)})
fig, ax = plt.subplots()

jitter = 0.3
offset = 0.05

sns.stripplot(data = result, y = 'anomaly_score', x = 'condition', palette = ['mediumseagreen', 'lightsalmon', 'cornflowerblue', 'lightcoral'], alpha = 0.7, jitter = jitter,  ax = ax, linewidth = .1, size = 7)
ax.set_yscale('log')

for y, condition in enumerate(result['condition'].unique()):
    condition_results = result[result['condition'] == condition]['anomaly_score'].astype(np.float32)
    ax.text(y + jitter + offset, condition_results.max(), condition_results.max().round(3))
    ax.text(y + jitter + offset, condition_results.mean(), condition_results.mean().round(3))
    ax.text(y + jitter + offset, condition_results.min(), condition_results.min().round(3))
    
ax.set_xlim(None, y + jitter + offset + 0.3)
    
ax.set_ylabel('Anomaly Score')
ax.set_xlabel('Condition')
ax.set_title('Anomaly Score Spectrogram')

fig.savefig('data/results/anomaly-score_spectrogram.png', dpi=330, bbox_inches='tight')

In [None]:
comparison = lineplot_comparison(result, 'Input', 'Input Reconstruction', 'Input Comparison Spectrogram', 'Index', 'Amplitude')
comparison.savefig('data/results/input-reconstruction_spectrogram.png', dpi=330, bbox_inches='tight')

In [None]:
comparison = lineplot_comparison(result, 'Latent Input', 'Latent Reconstruction', 'Latent Comparison Spectrogram', 'Index', 'Amplitude')
comparison.savefig('data/results/latent-reconstruction_spectrogram.png', dpi=330, bbox_inches='tight')

### Feature Extraction

In [None]:
features_test = np.array(features_test.tolist())
predictions = pipeline_feature_extraction.predict_proba(features_test)

predictions = pd.DataFrame(predictions)
predictions = predictions.T
predictions = predictions.rename({0: 'anomaly_score', 1: 'Input', 2: 'Input Reconstruction', 3: 'Latent Input', 4: 'Latent Reconstruction'}, axis = 1)

columns_flatten = ['Input', 'Input Reconstruction', 'Latent Input', 'Latent Reconstruction']
predictions[columns_flatten] = predictions[columns_flatten].applymap(lambda array: array.flatten())


result = labels_test.reset_index(drop=True).join(predictions)
result  = result.reset_index(drop = True)


result['relativeFaultPosition'] = result['relativeFaultPosition'].fillna('not available')
result['faultDiameter'] = result['faultDiameter'].fillna(-1)

In [None]:
sns.set_style('darkgrid')

sns.set(rc={'figure.figsize':(12, 6)})
fig, ax = plt.subplots()

jitter = 0.3
offset = 0.05

sns.stripplot(data = result, y = 'anomaly_score', x = 'condition', palette = ['mediumseagreen', 'lightsalmon', 'cornflowerblue', 'lightcoral'], alpha = 0.7, jitter = jitter,  ax = ax, linewidth = .1, size = 7)
ax.set_yscale('log')

for y, condition in enumerate(result['condition'].unique()):
    condition_results = result[result['condition'] == condition]['anomaly_score'].astype(np.float32)
    ax.text(y + jitter + offset, condition_results.max(), condition_results.max().round(3))
    ax.text(y + jitter + offset, condition_results.mean(), condition_results.mean().round(3))
    ax.text(y + jitter + offset, condition_results.min(), condition_results.min().round(3))
    
ax.set_xlim(None, y + jitter + offset + 0.3)
    
ax.set_ylabel('Anomaly Score')
ax.set_xlabel('Condition')
ax.set_title('Anomaly Score Feature Extraction')

fig.savefig('data/results/anomaly-score_feature-extraction.png', dpi=330, bbox_inches='tight')

In [None]:
comparison = lineplot_comparison(result, 'Input', 'Input Reconstruction', 'Feature-Extraction', 'Index', 'Amplitude')
comparison.savefig('data/results/input-reconstruction_feature-extraction.png', dpi=330, bbox_inches='tight')

In [None]:
comparison = lineplot_comparison(result, 'Latent Input', 'Latent Reconstruction', 'Latent Comparison Feature Extraction', 'Index', 'Amplitude')
comparison.savefig('data/results/latent-reconstruction_feature-extraction.png', dpi=330, bbox_inches='tight')