In [None]:
#...... Tuning SVM with GridSearchCV ........

import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.svm import SVC
from sklearn.utils.class_weight import compute_class_weight
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score, precision_score, recall_score, f1_score, cohen_kappa_score
import seaborn as sns
import matplotlib.pyplot as plt
import time

# ------------------ LOAD DATA ------------------
AttackFree = pd.read_csv("Attack_free new.csv")[0:2369397]
DoS = pd.read_csv("DoS_Attack_new.csv")[0:656578]
Fuzzy = pd.read_csv("Fuzzy_Attack_New.csv")[0:591989]
Impersonation = pd.read_csv("Impersonation_Attack_New.csv")[0:995471]

# ------------------ LABEL ENCODING ------------------
def encode_dataframe(df):
    le = LabelEncoder()
    for col in df.columns:
        df[col] = le.fit_transform(df[col].astype(str))
    return df

AttackFree = encode_dataframe(AttackFree)
DoS = encode_dataframe(DoS)
Fuzzy = encode_dataframe(Fuzzy)
Impersonation = encode_dataframe(Impersonation)

# ------------------ LABEL ASSIGNMENT ------------------
label1 = [1] * len(AttackFree)
label2 = [2] * len(DoS)
label3 = [3] * len(Fuzzy)
label4 = [4] * len(Impersonation)

Dataset = np.concatenate((AttackFree.values, DoS.values, Fuzzy.values, Impersonation.values), axis=0)
label = np.concatenate((label1, label2, label3, label4), axis=0)

# ------------------ CLASS WEIGHTS ------------------
class_weights_array = compute_class_weight(class_weight='balanced', classes=np.unique(label), y=label)
class_weights_dict = dict(zip(np.unique(label), class_weights_array))
print("Class Weights:", class_weights_dict)

# ------------------ TRAIN-TEST SPLIT + SCALING ------------------
X_Train, X_Test, Y_Train, Y_Test = train_test_split(Dataset, label, test_size=0.25, random_state=42)

scaler = StandardScaler()
X_Train = scaler.fit_transform(X_Train)
X_Test = scaler.transform(X_Test)

# ------------------ SVM TUNING ------------------
param_grid = {
    'C': [0.1, 1, 10, 100],
    'gamma': ['scale', 'auto', 0.01, 0.001],
    'kernel': ['rbf', 'poly', 'sigmoid']
}

grid = GridSearchCV(
    estimator=SVC(probability=True, class_weight=class_weights_dict),
    param_grid=param_grid,
    scoring='accuracy',
    cv=3,
    verbose=2,
    n_jobs=-1
)

print("\n Tuning SVM...")
start_train = time.time()
grid.fit(X_Train, Y_Train)
end_train = time.time()
print(f"\n Training Complete: {end_train - start_train:.2f} seconds")
print(" Best Params:", grid.best_params_)

best_svm = grid.best_estimator_

# ------------------ TESTING ------------------
start_test = time.time()
Y_Pred = best_svm.predict(X_Test)
end_test = time.time()
print(f"\n Testing Time: {end_test - start_test:.2f} seconds")

# ------------------ LATENCY ------------------
sample_input = X_Test[:1000]
start_inf = time.time()
_ = best_svm.predict(sample_input)
end_inf = time.time()
print(f"\n Inference Time (1000 samples): {end_inf - start_inf:.4f} sec")
print(f" Avg Latency per Sample: {(end_inf - start_inf)/1000 * 1000:.4f} ms")

# ------------------ METRICS ------------------
print("\n Classification Report (Tuned SVM):")
print(classification_report(Y_Test, Y_Pred, target_names=['AttackFree', 'DoS', 'Fuzzy', 'Impersonation']))

print(f" Accuracy: {accuracy_score(Y_Test, Y_Pred):.4f}")
print(f" Precision: {precision_score(Y_Test, Y_Pred, average='micro'):.4f}")
print(f" Recall: {recall_score(Y_Test, Y_Pred, average='micro'):.4f}")
print(f" F1 Score: {f1_score(Y_Test, Y_Pred, average='micro'):.4f}")
print(f" Cohen Kappa: {cohen_kappa_score(Y_Test, Y_Pred):.4f}")

# ------------------ CONFUSION MATRIX ------------------
labels = ['AttackFree', 'DoS', 'Fuzzy', 'Impersonation']
cm = confusion_matrix(Y_Test, Y_Pred)
sns.heatmap(pd.DataFrame(cm, index=labels, columns=labels), annot=True, cmap='Blues', fmt='d')
plt.title("Confusion Matrix - Tuned SVM")
plt.xlabel("Predicted")
plt.ylabel("True")
plt.tight_layout()
plt.show()

# ------------------ TPR & TNR ------------------
TPR = []
TNR = []

for i in range(len(labels)):
    tp = cm[i, i]
    fn = cm[i, :].sum() - tp
    fp = cm[:, i].sum() - tp
    tn = cm.sum() - (tp + fn + fp)

    tpr = tp / (tp + fn) if (tp + fn) else 0
    tnr = tn / (tn + fp) if (tn + fp) else 0

    TPR.append(tpr)
    TNR.append(tnr)

print("\n TPR (Recall) and TNR (Specificity) per class:")
for i, cls in enumerate(labels):
    print(f"{cls}:\n  TPR = {TPR[i]:.4f}\n  TNR = {TNR[i]:.4f}")
