In [None]:
# ---------------------- KNN Tuning ----------------------
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score, f1_score, precision_score, recall_score, cohen_kappa_score
from imblearn.over_sampling import RandomOverSampler
from sklearn.utils import shuffle
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import label_binarize
from sklearn.metrics import brier_score_loss

# ---------------------- LOAD AND PREPROCESS DATA ----------------------
AttackFree = pd.read_csv('Attack_free new.csv')[0:2369397]
DoS = pd.read_csv('DoS_Attack_new.csv')[0:656578]
Fuzzy = pd.read_csv('Fuzzy_Attack_New.csv')[0:591989]
Impersonation = pd.read_csv('Impersonation_Attack_New.csv')[0:995471]

# Encode HEX features (columns)----------------------------
label_encoder = LabelEncoder()
for df in [AttackFree, DoS, Fuzzy]:
    for i in range(1, 16):
        df.iloc[:, i] = label_encoder.fit_transform(df.iloc[:, i])

for i in range(16):
    Impersonation.iloc[:, i] = label_encoder.fit_transform(Impersonation.iloc[:, i])

# Assign labels
AttackFree['label'] = 1
DoS['label'] = 2
Fuzzy['label'] = 3
Impersonation['label'] = 4

# Combine all
df_all = pd.concat([AttackFree, DoS, Fuzzy, Impersonation], ignore_index=True)
df_all = shuffle(df_all, random_state=42)

X = df_all.iloc[:, :-1].values
y = df_all['label'].values

# ---------------------- SPLIT AND BALANCE ----------------------
X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y, test_size=0.25, random_state=0)

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

#-------------------------Balancing the training data------------------------
ros = RandomOverSampler(random_state=42)
X_train_bal, y_train_bal = ros.fit_resample(X_train, y_train)

# ---------------------- KNN HYPERPARAMETER TUNING ----------------------
param_grid = {
    'n_neighbors': [3, 5, 7, 9],
    'weights': ['uniform', 'distance'],
    'algorithm': ['auto', 'ball_tree', 'kd_tree', 'brute'],
    'leaf_size': [20, 30, 40],
    'p': [1, 2]  # 1 = manhattan, 2 = euclidean (for minkowski)
}

grid = GridSearchCV(
    estimator=KNeighborsClassifier(),
    param_grid=param_grid,
    scoring='accuracy',
    cv=3,
    verbose=2,
    n_jobs=-1
)

grid.fit(X_train_bal, y_train_bal)

# ---------------------- EVALUATION ----------------------
print("\n Best Parameters:")
print(grid.best_params_)

best_knn = grid.best_estimator_

y_pred = best_knn.predict(X_test)

print("\n Classification Report:")
print(classification_report(y_test, y_pred, target_names=['AttackFree', 'DoS', 'Fuzzy', 'Impersonation']))

cm = confusion_matrix(y_test, y_pred)
labels = ['AttackFree', 'DoS', 'Fuzzy', 'Impersonation']
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=labels, yticklabels=labels)
plt.title("Confusion Matrix - Tuned KNN")
plt.xlabel("Predicted")
plt.ylabel("True")
plt.tight_layout()
plt.show()

# -------------------------- TPR and TNR -------------------------
TPR = []
TNR = []
for i in range(len(cm)):
    tp = cm[i, i]
    fn = cm[i, :].sum() - tp
    fp = cm[:, i].sum() - tp
    tn = cm.sum() - (tp + fn + fp)
    TPR.append(tp / (tp + fn) if (tp + fn) != 0 else 0)
    TNR.append(tn / (tn + fp) if (tn + fp) != 0 else 0)

print("\n TPR and TNR:")
for i, cls in enumerate(labels):
    print(f"{cls}:  TPR = {TPR[i]:.4f},  TNR = {TNR[i]:.4f}")

print("\n Accuracy:", accuracy_score(y_test, y_pred))
print(" Precision:", precision_score(y_test, y_pred, average='micro'))
print(" Recall:", recall_score(y_test, y_pred, average='micro'))
print(" F1 Score:", f1_score(y_test, y_pred, average='micro'))
print(" Cohen Kappa Score:", cohen_kappa_score(y_test, y_pred))

