# **Importings**

## Libraries

In [None]:
import os

from scipy.fft import fft, fftfreq
from scipy import signal
import scipy.io

import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
import torchvision.transforms as transforms


import json
import pandas as pd
import numpy as np


from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler, LabelBinarizer
from sklearn.metrics import accuracy_score

import matplotlib.pyplot as plt
import seaborn as sns
sns.set()

In [None]:
if torch.cuda.is_available():
  device = torch.device("cuda:0")
  print("GPU Runtime Detected")

else:
  device = torch.device("cpu")
  print("No GPU Found - CPU Runtime")

## Data

### Faulty

In [None]:
# declare the base_dir to the upper folder of the dataset with the following directories
base_dir = ''
os.listdir(base_dir)

In [None]:
DE_12_dir = base_dir + '12DriveEndFault/'
loads = os.listdir(DE_12_dir)
loads

In [None]:
for load in loads:
  files = os.listdir(base_dir + '12DriveEndFault/' + load + '/')
  print(load, ': ', len(files), ' - ', files)

In [None]:
def signal_divider(signal, window_len):

    signal = np.array(signal)

    fractions_of_signal = []
    starting_index = 0
    while starting_index < len(signal):

        fractions_of_signal.append(signal[starting_index : starting_index + window_len])
        starting_index = starting_index + window_len

    if len(fractions_of_signal[-1]) != window_len:
      del fractions_of_signal[-1]

    return pd.DataFrame(np.array(fractions_of_signal).reshape(len(fractions_of_signal), window_len))

In [None]:
i = 0
DE_dict = {}
for load in loads:
  for file in os.listdir(base_dir + '12DriveEndFault/' + load + '/'):

    file = "".join(list(file)[:-4])
    mat_data = scipy.io.loadmat(base_dir + '12DriveEndFault/' + load + '/' + file)

    for key in list(mat_data.keys()):
      if key.startswith("X"):
        code = key[:4]

    if code + '_DE_time' in mat_data:
      DE_dict[load + '-' + file] = mat_data[code + '_DE_time']

    print(i, ':  - ' , load + '-' + file , mat_data.keys())
    i = i + 1

In [None]:
window_len = 2048

In [None]:
dfs = []
for key in list(DE_dict.keys()):
  load, sev, state = key.split('-')
  print(key, '\n')
  print('Load: ', load, ' - Severity: ', sev, ' - State: ', state, 'Len: ', np.shape(DE_dict[key]), '\n\n')
  temp_df = signal_divider(DE_dict[key], window_len)
  temp_df['load'] = load
  temp_df['sev'] = sev
  temp_df['state'] = state
  dfs.append(temp_df)

In [None]:
DE_faulty_time_df = pd.concat(dfs).reset_index(drop = True)
DE_faulty_time_df

### Normal

In [None]:
normal_dir = base_dir + 'NormalBaseline/'
loads = os.listdir(normal_dir)
loads

In [None]:
for load in loads:
  files = os.listdir(normal_dir + load + '/')
  print(load, ': ', len(files), ' - ', files)

In [None]:
i = 0
DE_dict = {}
for load in loads:
  for file in os.listdir(normal_dir + load + '/'):

    file = "".join(list(file)[:-4])
    mat_data = scipy.io.loadmat(normal_dir + load + '/' + file)

    for key in list(mat_data.keys()):
      if key.startswith("X"):
        code = key[:4]

    if code + '_DE_time' in mat_data:
      DE_dict[load + '-' + file] = mat_data[code + '_DE_time']

    print(i, ':  - ' , load + '-' + file , mat_data.keys())
    i = i + 1

In [None]:
dfs = []
for key in list(DE_dict.keys()):
  load, state = key.split('-')
  print(key, '\n')
  print('Load: ', load, ' - State: ', state, 'Len: ', np.shape(DE_dict[key]), '\n\n')
  temp_df = signal_divider(DE_dict[key], window_len)
  temp_df['load'] = load
  temp_df['state'] = state
  dfs.append(temp_df)

In [None]:
DE_normal_time_df = pd.concat(dfs).reset_index(drop = True)
DE_normal_time_df

In [None]:
DE_normal_time_df['sev'] = 0

In [None]:
DE_time_df = pd.concat([DE_faulty_time_df, DE_normal_time_df]).reset_index(drop = True)
DE_time_df

In [None]:
def ff_transformer(time_signal, freq_filter_flag = False, window_flag = False):
    N = len(time_signal)

    if freq_filter_flag is True:
      band_pass_filter = signal.butter(25, [2.5, 1500], 'bandpass', fs=3200, output='sos')
      time_signal = signal.sosfilt(band_pass_filter, time_signal)

    if window_flag is True:
      window = signal.windows.hann(N)
      time_signal = time_signal * window

    fft_signal = 2.0/N * np.abs(fft(np.array(time_signal))[0:N//2])

    return fft_signal

In [None]:
def df_ff_transformer(time_df):
    freq_domain = []
    for index, row in time_df.iterrows():
        freq_domain.append(ff_transformer(row, freq_filter_flag = True, window_flag = True))

    freq_domain_df = pd.DataFrame(freq_domain)
    return freq_domain_df

# **Data Preprocessing and Preparation**


In [None]:
DE_time_df['state'].value_counts()

## Aggregating all the OuterRaces to Single One

In this step, all the variants of **OuterRace** problem, including *OuterRace3*, *OuterRace6* and *OuterRace12* are aggregate to form a single **OuterRace** Health State.

In [None]:
for i in range(len(DE_time_df['state'])):
  if DE_time_df['state'][i].startswith('O'):
    DE_time_df['state'][i] = 'OuterRace'

In [None]:
DE_time_df['state'] = DE_time_df['state'].astype('category')
DE_time_df['state'].value_counts()

## Encoding State to Numerical Value

In [None]:
from sklearn.preprocessing import LabelEncoder
labelencoder= LabelEncoder()
DE_time_df['state_encoded'] = labelencoder.fit_transform(DE_time_df['state'])

In [None]:
DE_time_df['state_encoded'].value_counts()

## Train/Test Spliting

In [None]:
df_train, df_test = train_test_split(DE_time_df, test_size=0.3, random_state=42)

In [None]:
df_test

In [None]:
exporting_dir = '/content/drive/My Drive/brand_new_CWRU/basic_classification/12DE_fault/exported_models/'
df_test.groupby(['state', 'load', 'sev']).sample(5).reset_index(drop = True).to_csv(exporting_dir + 'subsampled_test_df.csv')

In [None]:
x_train = df_train.iloc[:, :2048]
y_train = df_train.iloc[:, -1]

x_test = df_test.iloc[:, :2048]
y_test = df_test.iloc[:, -1]

In [None]:
x_train.shape

In [None]:
x_test.shape

## Feature Scaling

In [None]:
def scaler(arr):

  mu = arr.float().mean(dim = 1)
  sig = arr.float().std(dim = 1)

  array_scaled = (torch.subtract(arr.transpose(0, 1), mu) / sig).transpose(0, 1)

  return array_scaled

# **Model Training**

## Function - Model Creator

In [None]:
class Classifier(torch.nn.Module):
    def __init__(self):
        super(Classifier, self).__init__()

        self.conv = nn.Conv1d(in_channels = 1, out_channels = 5, kernel_size = 100)
        self.avgPool = nn.AvgPool1d(kernel_size = 50)
        self.fc = nn.Linear(5 * 38, 4)

    def forward(self, x):

        z = self.conv(x)
        z = torch.tanh(z)
        z = self.avgPool(z)

        z = z.view(-1, 5 * 38)

        z = self.fc(z)

        return z

In [None]:
from torchsummary import summary

model = Classifier().to(device)
summary(model, (1, 2048))

In [None]:
!pip install torchview

In [None]:
from torchview import draw_graph
model_graph = draw_graph(model, input_size=(1, 2048), device=device)
model_graph.visual_graph

## Creating, Compiling and Fitting the Model

In [None]:
x_train_train, x_train_validation, y_train_train, y_train_validation = train_test_split(x_train, y_train, test_size = 0.25)

# x_train_train = x_train_train.to_numpy().reshape(2292, 1, 2048)
# x_train_validation = x_train_validation.to_numpy().reshape(764, 1, 2048)

x_train_train_scaled = scaler(torch.Tensor(x_train_train.to_numpy()).float()).reshape(2292, 1, 2048)
x_train_validation_scaled = scaler(torch.Tensor(x_train_validation.to_numpy()).float()).reshape(764, 1, 2048)

x_train_VAR = torch.autograd.Variable(x_train_train_scaled).to(device)
y_train_VAR = torch.autograd.Variable(torch.LongTensor(y_train_train.to_numpy())).to(device)
x_valid_VAR = torch.autograd.Variable(torch.Tensor(x_train_validation_scaled).float()).to(device)
y_valid_VAR = torch.autograd.Variable(torch.LongTensor(y_train_validation.to_numpy())).to(device)

In [None]:
lr = 0.001
ep = 5000

model = Classifier().to(device)

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(),
                             lr = lr,
                             weight_decay = lr / ep)

losses = []
valid_losses = []
accs = []
valid_accs = []



for epoch in range(ep):

  # validation step
  valid_loss = criterion(model(x_valid_VAR), y_valid_VAR).item()
  valid_losses.append(valid_loss)
  valid_acc = accuracy_score(y_train_validation, np.argmax(model(x_valid_VAR).cpu().detach().numpy(), axis = 1))
  valid_accs.append(valid_acc)

  # training step
  optimizer.zero_grad()
  loss = criterion(model(x_train_VAR), y_train_VAR)
  acc = accuracy_score(y_train_train, np.argmax(model(x_train_VAR).cpu().detach().numpy(), axis = 1))
  accs.append(acc)
  losses.append(loss.item())
  loss.backward()
  optimizer.step()
  print(f"Epoch {epoch+1}, loss: {np.round(loss.item(), 4)}  , Vloss: {np.round(valid_loss, 4)}, acc: {np.round(acc, 4)}, Vacc: {np.round(valid_acc, 4)}")

In [None]:
fig, axes = plt.subplots(1, 2, figsize=(16,4))
fig.suptitle('Deep Learning Model Training Process')
axes[0].plot(losses, label='Training Loss')
axes[0].plot(valid_losses, label='Validation Loss')
axes[0].set_xlabel('Epochs')
axes[0].set_ylabel('Loss')
axes[0].set_title('Loss Vs. Epochs')
axes[0].legend()

axes[1].plot(accs, label='Training Accuracy')
axes[1].plot(valid_accs, label='Validation Accuracy')
axes[1].set_xlabel('Epochs')
axes[1].set_ylabel('Accuracy')
axes[1].set_title('Accuracy Vs. Epochs')
axes[1].legend()
plt.show()

# **Model Evaluation**

In [None]:
x_test_scaled = scaler(torch.Tensor(x_test.to_numpy()))
x_test_VAR = torch.autograd.Variable(x_test_scaled).reshape(1311, 1, 2048).to(device)
testing_acc = accuracy_score(y_test, np.argmax(F.softmax(model(x_test_VAR)).cpu().detach().numpy(), axis = 1))
print('Testing Accuracy:   ', testing_acc)

In [None]:
y_test_pred = np.argmax(F.softmax(model(x_test_VAR)).cpu().detach().numpy(), axis = 1)
y_test_pred_decoded = labelencoder.inverse_transform(y_test_pred)

In [None]:
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay

matrix = confusion_matrix(y_test, y_test_pred)

ax = sns.heatmap(matrix, annot=True, fmt='d', cbar = True, square = True, cmap = 'Blues')
ax.set_xlabel("Predicted", fontsize=14, labelpad=20)
ax.xaxis.set_ticklabels(np.unique(y_test_pred_decoded))
ax.set_ylabel("Actual", fontsize=14, labelpad=20)
ax.yaxis.set_ticklabels(np.unique(y_test_pred_decoded))
ax.set_title("Confusion Matrix", fontsize=14, pad=20)
plt.show()

In [None]:
# delcare the exporting_dir as the directory you want to export the trained model
exporting_dir = ''
torch.save(model.state_dict(), exporting_dir + 'lightCNN_timeClassifier_Pytorch_preprocessing_state_dict.pth')