This notebook creates and trains a KNN model which deals with sequence encoded data.

In [1]:
import pandas as pd

# Possible critical alarm types
critical_alarm_types = [7,15,16,21,33,56,68,95,1000,1001]
df = pd.DataFrame()

for i in critical_alarm_types:
    # Replace the link according to the data you want to read (All, Unique Samples, Random Samples)
    type_df = pd.read_csv("../Data/Final Data/Train/Random Samples/Sequences/" + str(i) + ".csv")
    df = pd.concat([df, type_df], ignore_index=True)

X = df.drop(columns=['y']).values
y = df['y'].values

If you wish to perform an upsampling using SMOTE, run the following cell. If you do not wish to perform an upsampling, bypass the following cell and run the next one.

In [None]:
# Upsampling using SMOTE

from imblearn.over_sampling import SMOTE

# Apply SMOTE to balance the dataset
sm = SMOTE(random_state=42)
X, y = sm.fit_resample(X, y)

In [None]:
# Split the data into train and test sets

from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)

In [None]:
# Creating the model

from sklearn.neighbors import KNeighborsClassifier

knn = KNeighborsClassifier(n_neighbors=15, metric='manhattan', n_jobs=-1)
knn.fit(X_train, y_train)

In [None]:
import joblib

joblib.dump(knn, 'KNN_Unique_Sequence.joblib')

In [None]:
# Prediction with test data

y_pred = knn.predict(X_test)

In [None]:
# Print classification report

from sklearn.metrics import classification_report

print(classification_report(y_test, y_pred))

In [None]:
# Print confusion matrix

from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
import matplotlib.pyplot as plt

cm = confusion_matrix(y_test, y_pred, labels=knn.classes_)
disp = ConfusionMatrixDisplay(confusion_matrix=cm,display_labels=knn.classes_)
disp.plot()
plt.show()