In [None]:
# =====TUNED SVM Model =====

import pandas as pd
import numpy as np
import time
from datetime import datetime

from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split
from sklearn.utils.class_weight import compute_class_weight
from sklearn.svm import SVC

from sklearn.metrics import (
    classification_report, confusion_matrix, accuracy_score,
    precision_score, recall_score, f1_score, cohen_kappa_score,
    hinge_loss
)
import seaborn as sns
import matplotlib.pyplot as plt

# ----------------DATA LOADING---------
 
AttackFree= pd.read_csv('Attack_free new.csv')[0:355409]
DoS= pd.read_csv('DoS_Attack_new.csv')[0:98486]
Fuzzy= pd.read_csv('Fuzzy_Attack_New.csv')[0:88798]
Impersonation= pd.read_csv('Impersonation_Attack_New.csv')[0:149320]

n1, n2, n3, n4 = len(AttackFree), len(DoS), len(Fuzzy), len(Impersonation)

# ------------UNIFORM DATAFRAME---------------------------------------

X_df = pd.concat([AttackFree, DoS, Fuzzy, Impersonation], axis=0, ignore_index=True)

# -----------------PREPROCESSING---------------------------
for col in X_df.columns:
    
    col_as_num = pd.to_numeric(X_df[col], errors='coerce')
   
    if col_as_num.isna().mean() > 0.5:
        le = LabelEncoder()
        X_df[col] = le.fit_transform(X_df[col].astype(str))
    else:
        X_df[col] = col_as_num.fillna(col_as_num.median())

# -----------------------LABELLING ------------------------
y = np.r_[
    np.full(n1, 1, dtype=int),
    np.full(n2, 2, dtype=int),
    np.full(n3, 3, dtype=int),
    np.full(n4, 4, dtype=int)
]

# ------CLASS WEIGHTS -------------------------------
classes = np.unique(y)
class_weights_array = compute_class_weight(class_weight='balanced', classes=classes, y=y)
class_weights_dict = dict(zip(classes, class_weights_array))
print("Class Weights:", class_weights_dict)

# 5) Stratify TRAIN/TEST SPLIT -----------------------------------
X_train, X_test, y_train, y_test = train_test_split(
    X_df.values, y, test_size=0.25, random_state=42, stratify=y
)

# ----------- SVM Model USING SVC with tuned parameters------
clf = Pipeline([
    ('scaler', StandardScaler()),
    ('svm', SVC(
        kernel='rbf',
        C=100,
        gamma='scale',
        class_weight=class_weights_dict,
        probability=False,
        random_state=42,
        cache_size=1000,
        tol=1e-3
    ))
])

# ---------TRAINING------------
t0 = datetime.now()
clf.fit(X_train, y_train)
t1 = datetime.now()
print(' Training Duration:', t1 - t0)

# ---------- TESTING---------- 
t2 = datetime.now()
y_pred = clf.predict(X_test)
t3 = datetime.now()
print(' Testing Duration:', t3 - t2)

# === INFERENCE LATENCY ------------
sample_n = min(1000, len(X_test))
sample_input = X_test[:sample_n]
ts = time.time()
_ = clf.predict(sample_input)
te = time.time()
print(f"\n Inference Time for {sample_n} samples: {te - ts:.4f} s")
print(f" Avg Latency per Sample: {(te - ts)/sample_n * 1000:.4f} ms")

# ---------EVALUATION METRICS
print("\n Classification Report (Tuned SVM):")
print(classification_report(y_test, y_pred, target_names=['AttackFree', 'DoS', 'Fuzzy', 'Impersonation']))

print(f"Accuracy:  {accuracy_score(y_test, y_pred):.4f}")
print(f" Precision: {precision_score(y_test, y_pred, average='micro'):.4f}")
print(f" Recall:    {recall_score(y_test, y_pred, average='micro'):.4f}")
print(f" F1 Score:  {f1_score(y_test, y_pred, average='micro'):.4f}")
print(f" Cohen Kappa: {cohen_kappa_score(y_test, y_pred):.4f}")

#----------- CONFUSION MATRIX + TPR/TNR -------------------
labels = ['AttackFree', 'DoS', 'Fuzzy', 'Impersonation']
cm = confusion_matrix(y_test, y_pred, labels=[1,2,3,4])

plt.figure(figsize=(7,5))
sns.heatmap(pd.DataFrame(cm, index=labels, columns=labels), annot=True, cmap='Blues', fmt='d')
plt.title("Confusion Matrix - Tuned SVM (RBF, C=100, gamma='scale')")
plt.xlabel("Predicted")
plt.ylabel("True")
plt.tight_layout()
plt.show()

TPR, TNR = [], []
for i in range(len(labels)):
    tp = cm[i, i]
    fn = cm[i, :].sum() - tp
    fp = cm[:, i].sum() - tp
    tn = cm.sum() - (tp + fn + fp)
    TPR.append(tp / (tp + fn) if (tp + fn) else 0)
    TNR.append(tn / (tn + fp) if (tn + fp) else 0)

print("\n TPR (Recall) and TNR (Specificity) per class:")
for i, cls in enumerate(labels):
    print(f"{cls}:   TPR = {TPR[i]:.4f}    TNR = {TNR[i]:.4f}")

# ----- Hinge loss using decision_function

scores = clf.named_steps['svm'].decision_function(clf.named_steps['scaler'].transform(X_test))
print(f"\n Hinge Loss: {hinge_loss(y_test, scores, labels=[1,2,3,4]):.4f}")
