This notebook performs a GridSearchCV to find those parameters that lead to the best model performance.

In [None]:
import pandas as pd

# Possible critical alarm types
critical_alarm_types = [7,15,16,21,33,56,68,95,1000,1001]
df = pd.DataFrame()

for i in critical_alarm_types:
    # Replace the link according to the data you want to read (All, Unique Samples, Random Samples) and the data encoding method (sequence, count-based)
    type_df = pd.read_csv("../Data/Train/Random Samples/Count-based/" + str(i) + "_countbased.csv")
    df = pd.concat([df, type_df], ignore_index=True)

X = df.drop(columns=['y']).values
y = df['y'].values

In [None]:
# Train-test-split
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)

In [None]:
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import make_scorer
from sklearn.metrics import f1_score
from sklearn.neighbors import KNeighborsClassifier
import joblib

# Define the scoring metric
scorer = make_scorer(f1_score)

model = KNeighborsClassifier()

grid_space={'n_neighbors':[5,7,9,11,13,15],
            'metric':['euclidean','manhattan']
            }

grid = GridSearchCV(model,param_grid=grid_space,cv=2,scoring="f1_micro",verbose=10)

model_grid = grid.fit(X_train,y_train)

In [None]:
# Print best working parameters

print('Best hyperparameters are: '+str(model_grid.best_params_))
print('Best score is: '+str(model_grid.best_score_))