In [None]:
"""
Notebook: One-Class SVM Model Training for INADS
-------------------------------------------------
Objective:
- Train a One-Class SVM model for anomaly detection in network traffic.
- Evaluate model performance using accuracy, confusion matrix, and ROC-AUC.
- Compare results with XGBoost and Isolation Forest.

Dataset:
- Preprocessed train and test sets (train_set_fixed.csv, test_set_fixed.csv).
- Label encoding applied to convert multi-class problem into a normal vs anomaly classification.

"""

# ===============================
# Step 1: Import Necessary Libraries
# ===============================

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.svm import OneClassSVM
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix, roc_auc_score, roc_curve

# ===============================
# Step 2: Load the Train and Test Sets
# ===============================

# Define dataset paths
train_path = r"C:\Users\S569652\Documents\INADS\data\train_set_fixed.csv"
test_path = r"C:\Users\S569652\Documents\INADS\data\test_set_fixed.csv"

# Load datasets
train_df = pd.read_csv(train_path)
test_df = pd.read_csv(test_path)

# Separate features and labels
X_train = train_df.drop(columns=["Label"])  # Features
y_train = train_df["Label"]  # Labels

X_test = test_df.drop(columns=["Label"])  # Features
y_test = test_df["Label"]  # Labels

# Check dataset shapes
print(f"Train Set Shape: {X_train.shape}, Labels: {y_train.shape}")
print(f"Test Set Shape: {X_test.shape}, Labels: {y_test.shape}")

# ===============================
# Step 3: Convert Multi-Class Labels into Binary (Normal vs Anomaly)
# ===============================

# Define normal and anomaly labels
normal_label = "Benign"
y_train_binary = np.where(y_train == normal_label, 1, -1)  # 1 for Normal, -1 for Anomaly
y_test_binary = np.where(y_test == normal_label, 1, -1)  # 1 for Normal, -1 for Anomaly

print("\nConverted Labels: Normal (1) | Anomaly (-1)")

# ===============================
# Step 4: Train the One-Class SVM Model
# ===============================

# Define the One-Class SVM model
ocsvm_model = OneClassSVM(kernel="rbf", nu=0.1, gamma="scale")  # nu=0.1 means 10% expected anomalies

print("Training One-Class SVM model...")
ocsvm_model.fit(X_train)

# ===============================
# Step 5: Model Evaluation
# ===============================

# Predict on test set
y_pred = ocsvm_model.predict(X_test)

# Map predictions to labels (convert -1 to "Anomaly" and 1 to "Benign")
y_pred_labels = np.where(y_pred == 1, "Benign", "Anomaly")
y_test_labels = np.where(y_test_binary == 1, "Benign", "Anomaly")

# Calculate accuracy
train_acc = accuracy_score(y_train_binary, ocsvm_model.predict(X_train))
test_acc = accuracy_score(y_test_binary, y_pred)

print(f"\nTrain Accuracy: {train_acc:.4f}")
print(f"Test Accuracy: {test_acc:.4f}")

# Classification Report
print("\nClassification Report (Test Set):")
print(classification_report(y_test_labels, y_pred_labels))

# ===============================
# Step 6: Confusion Matrix
# ===============================

# Generate Confusion Matrix
plt.figure(figsize=(8, 6))
sns.heatmap(
    confusion_matrix(y_test_labels, y_pred_labels),
    annot=True,
    fmt="d",
    cmap="Reds",
    xticklabels=["Benign", "Anomaly"],
    yticklabels=["Benign", "Anomaly"]
)
plt.title("Confusion Matrix - One-Class SVM Model", fontsize=14)
plt.xlabel("Predicted Label")
plt.ylabel("True Label")
plt.show()

# ===============================
# Step 7: ROC-AUC Curve
# ===============================

# Convert labels to binary for ROC calculation
y_test_binary_roc = np.where(y_test_labels == "Benign", 1, 0)
y_pred_scores = ocsvm_model.decision_function(X_test)  # Get decision scores

# Compute ROC-AUC score
fpr, tpr, _ = roc_curve(y_test_binary_roc, y_pred_scores)
auc = roc_auc_score(y_test_binary_roc, y_pred_scores)

# Plot ROC Curve
plt.figure(figsize=(8, 6))
plt.plot(fpr, tpr, color="darkred", label=f"One-Class SVM (AUC: {auc:.2f})")
plt.plot([0, 1], [0, 1], "k--")  # Diagonal reference line
plt.xlabel("False Positive Rate")
plt.ylabel("True Positive Rate")
plt.title("ROC-AUC Curve - One-Class SVM")
plt.legend()
plt.show()

# ===============================
# Final Results
# ===============================

print("\nFinal Evaluation Summary:")
print(f"Train Accuracy: {train_acc:.4f}")
print(f"Test Accuracy: {test_acc:.4f}")
print("Confusion matrix and ROC curve plotted.")

Train Set Shape: (1618172, 15), Labels: (1618172,)
Test Set Shape: (404543, 15), Labels: (404543,)

Converted Labels: Normal (1) | Anomaly (-1)
Training One-Class SVM model...


In [5]:
import pandas as pd

# Define dataset paths
train_path = r"C:\Users\S569652\Documents\INADS\data\train_set_fixed.csv"
test_path = r"C:\Users\S569652\Documents\INADS\data\test_set_fixed.csv"

# Load datasets
train_df = pd.read_csv(train_path)
test_df = pd.read_csv(test_path)

# Define the 15 best-selected features
selected_features = [
    'Bwd Pkt Len Mean', 'Flow IAT Mean', 'Fwd Pkt Len Mean', 'Flow IAT Std',
    'TotLen Fwd Pkts', 'Flow Duration', 'Bwd Pkts/s', 'Flow Pkts/s',
    'Fwd IAT Std', 'Flow Byts/s', 'Fwd Pkt Len Max', 'Flow IAT Max',
    'Init Fwd Win Byts', 'Fwd Seg Size Min', 'Dst Port'
]

# Keep only selected features + label
train_df = train_df[selected_features + ['Label']]
test_df = test_df[selected_features + ['Label']]

print(f"Train Shape: {train_df.shape}, Test Shape: {test_df.shape}")

Train Shape: (1618172, 16), Test Shape: (404543, 16)


In [6]:
from sklearn.preprocessing import MinMaxScaler

# Separate features and labels
X_train = train_df.drop(columns=["Label"])
y_train = train_df["Label"]

X_test = test_df.drop(columns=["Label"])
y_test = test_df["Label"]

# Apply MinMax Scaling
scaler = MinMaxScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

print("Feature scaling complete.")

Feature scaling complete.


In [7]:
# Define normal traffic label
normal_label = "Benign"

# Convert labels (1 for Normal, -1 for Anomaly)
y_train_binary = (y_train == normal_label).astype(int) * 2 - 1
y_test_binary = (y_test == normal_label).astype(int) * 2 - 1

print("Labels converted to binary format.")

Labels converted to binary format.


In [None]:
from sklearn.svm import OneClassSVM

# Define One-Class SVM Model
ocsvm_model = OneClassSVM(kernel="rbf", nu=0.1, gamma="scale")  # nu=0.1 means 10% anomalies expected

print("Training One-Class SVM on full dataset...")
ocsvm_model.fit(X_train_scaled)
print("Model training complete.")

Training One-Class SVM on full dataset...
