===========================================================================================================================
file:       benchmarks.jpynb
version:    21.7.2023
title:      BAC2 FH Salzburg: Federated Learning für Predictive Maintenance in Bereichen mit hohen Datenschutzanforderungen
author:     Alexander Pachler
===========================================================================================================================

===========================================================================================================================
Part 1:     Model based on complete dataset
===========================================================================================================================

In [None]:
# Import file
import pandas as pd
import matplotlib.pyplot as plt
df = pd.read_csv('predictive_maintenance.csv')
df.head()

In [None]:
# Check dataset
df.describe()

In [None]:
# Check dataset
df.info()

In [None]:
# Convert/create "Failure Type Cat" as category
df["Failure Type Cat"] = df["Failure Type"].astype('category').cat.codes
df.head()

In [None]:
# Split dataset in training and test part
from sklearn.model_selection import train_test_split
df_train, df_test = train_test_split(df, train_size=0.8)
df_train.shape

In [None]:
df_test.shape

In [None]:
# Check which failure types are present in the dataset
grp = df.groupby("Failure Type").count()
grp['Target']

In [None]:
# Check which failure types are present in the dataset
grp = df.groupby("Failure Type Cat").count()
grp['Target']

In [None]:
# Set classifier
from sklearn.ensemble import RandomForestClassifier
clf = RandomForestClassifier(random_state=0, max_depth=2)

In [None]:
# Train model based on features (X) for output (y)
clf.fit(X=df_train[['Air temperature [K]', 'Process temperature [K]', 'Rotational speed [rpm]', 'Torque [Nm]', 'Tool wear [min]']], y=df_train['Failure Type Cat'])

In [None]:
# Predict behaviour based on trained model
y_pred = clf.predict(X=df_test[['Air temperature [K]', 'Process temperature [K]', 'Rotational speed [rpm]', 'Torque [Nm]', 'Tool wear [min]']])
y_pred[:100]

In [None]:
# Check accuracy of model
from sklearn.metrics import accuracy_score
acc = accuracy_score(df_test['Failure Type Cat'], y_pred)
print(f'The accuracy is {acc*100:.2f}%.')

In [None]:
# Create a confusion matrix
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(df_test['Failure Type Cat'], y_pred)
cm

In [None]:
# Visualize the confusion matrix
from sklearn.metrics import ConfusionMatrixDisplay
disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=['Heat Dissipation Failure', 'No Failure', 'Overstrain Failure', 'Power Failure', 'Random Failures', 'Tool Wear Failure'])
disp.plot(cmap='Greys')
plt.xticks(rotation=90)
plt.show()

===========================================================================================================================
Part 2:     Model based on complete dataset split in three equal parts, handled trained individually
===========================================================================================================================

In [None]:
# Randomly split the dataset in approx. 3 thirds - done only once - therefore commented!
# df_part1 = df.sample(frac=1/3, random_state=1)
# df_part2 = df.drop(df_part1.index).sample(frac=1/2, random_state=1)
# df_part3 = df.drop(df_part1.index).drop(df_part2.index)

In [None]:
# Save splitted datasets to csv (for re-use) - done only once - therefore commented!
# df_part1.to_csv('df_part1.csv')
# df_part2.to_csv('df_part2.csv')
# df_part3.to_csv('df_part3.csv')

In [None]:
# Re-import randomly splitted data frames (equal sources for various tests)
df_part1 = pd.read_csv('df_part1.csv')
df_part2 = pd.read_csv('df_part2.csv')
df_part3 = pd.read_csv('df_part3.csv')

In [None]:
df_part1.head()

In [None]:
df_part2.head()

In [None]:
df_part3.head()

In [None]:
# Split dataset in training and test part
df_train1, df_test1 = train_test_split(df_part1, train_size=0.8)
df_train1.shape

In [None]:
df_test1.shape

In [None]:
df_train2, df_test2 = train_test_split(df_part2, train_size=0.8)
df_train2.shape

In [None]:
df_test2.shape

In [None]:
df_train3, df_test3 = train_test_split(df_part3, train_size=0.8)
df_train3.shape

In [None]:
df_test3.shape

In [None]:
# Decision tree classifier
clf1 = RandomForestClassifier(random_state=0, max_depth=2)
clf2 = RandomForestClassifier(random_state=0, max_depth=2)
clf3 = RandomForestClassifier(random_state=0, max_depth=2)

In [None]:
# Train model based on features (X) for output (y) 1/3
clf1.fit(X=df_train1[['Air temperature [K]', 'Process temperature [K]', 'Rotational speed [rpm]', 'Torque [Nm]', 'Tool wear [min]']], y=df_train1['Failure Type Cat'])

In [None]:
# Predict behaviour based on trained model 1/3
y_pred1 = clf1.predict(X=df_test1[['Air temperature [K]', 'Process temperature [K]', 'Rotational speed [rpm]', 'Torque [Nm]', 'Tool wear [min]']])
y_pred1[:100]

In [None]:
# Train model based on features (X) for output (y) 2/3
clf2.fit(X=df_train2[['Air temperature [K]', 'Process temperature [K]', 'Rotational speed [rpm]', 'Torque [Nm]', 'Tool wear [min]']], y=df_train2['Failure Type Cat'])

In [None]:
# Predict behaviour based on trained model 2/3
y_pred2 = clf2.predict(X=df_test2[['Air temperature [K]', 'Process temperature [K]', 'Rotational speed [rpm]', 'Torque [Nm]', 'Tool wear [min]']])
y_pred2[:100]

In [None]:
# Train model based on features (X) for output (y) 3/3
clf3.fit(X=df_train3[['Air temperature [K]', 'Process temperature [K]', 'Rotational speed [rpm]', 'Torque [Nm]', 'Tool wear [min]']], y=df_train3['Failure Type Cat'])

In [None]:
# Predict behaviour based on trained model 3/3
y_pred3 = clf3.predict(X=df_test3[['Air temperature [K]', 'Process temperature [K]', 'Rotational speed [rpm]', 'Torque [Nm]', 'Tool wear [min]']])
y_pred3[:100]

In [None]:
# Create a confusion matrix
cm1 = confusion_matrix(df_test1['Failure Type Cat'], y_pred1)
cm1

In [None]:
cm2 = confusion_matrix(df_test2['Failure Type Cat'], y_pred2)
cm2

In [None]:
cm3 = confusion_matrix(df_test3['Failure Type Cat'], y_pred3)
cm3

In [None]:
# Visualize the confusion matrix
disp1 = ConfusionMatrixDisplay(confusion_matrix=cm1, display_labels=['Heat Dissipation Failure', 'No Failure', 'Overstrain Failure', 'Power Failure', 'Random Failures', 'Tool Wear Failure'])
disp1.plot(cmap='Greys')
plt.xticks(rotation=90)
plt.show()

In [None]:
disp2 = ConfusionMatrixDisplay(confusion_matrix=cm2, display_labels=['Heat Dissipation Failure', 'No Failure', 'Overstrain Failure', 'Power Failure', 'Random Failures', 'Tool Wear Failure'])
disp2.plot(cmap='Greys')
plt.xticks(rotation=90)
plt.show()

In [None]:
disp3 = ConfusionMatrixDisplay(confusion_matrix=cm3, display_labels=['Heat Dissipation Failure', 'No Failure', 'Overstrain Failure', 'Power Failure', 'Random Failures', 'Tool Wear Failure'])
disp3.plot(cmap='Greys')
plt.xticks(rotation=90)
plt.show()

In [None]:
# Check accuracy of model
from sklearn.metrics import accuracy_score
acc1 = accuracy_score(df_test1['Failure Type Cat'], y_pred1)
acc2 = accuracy_score(df_test2['Failure Type Cat'], y_pred2)
acc3 = accuracy_score(df_test3['Failure Type Cat'], y_pred3)
print(f'The accuracy of the complete ds is {acc*100:.2f}%.')
print(f'The accuracy of part 1 is {acc1*100:.2f}%.')
print(f'The accuracy of part 2 is {acc2*100:.2f}%.')
print(f'The accuracy of part 3 is {acc3*100:.2f}%.')