In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/nf-ton-iotv1/NetFlow_v1_Features.csv
/kaggle/input/nf-ton-iotv1/NF-ToN-IoT.csv


In [2]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix, f1_score
import matplotlib.pyplot as plt

In [3]:
# Load dataset
df = pd.read_csv("/kaggle/input/nf-ton-iotv1/NF-ToN-IoT.csv")
df = df.drop(columns=['IPV4_SRC_ADDR', 'IPV4_DST_ADDR'], errors='ignore')

X = df.drop(columns=['Attack', 'Label'], errors='ignore').values
y = df['Attack'].values

# Binary encode labels: Benign=0, Attack=1
binary_y = np.where(y == 'Benign', 0, 1)

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, binary_y, test_size=0.3, random_state=42)

# Normalize
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Train only on benign samples
X_train_benign = X_train[y_train == 0]

# Convert to tensors
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
X_train_tensor = torch.tensor(X_train_benign, dtype=torch.float32).to(device)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32).to(device)

In [4]:
# Define SVDD network
class SVDDNet(nn.Module):
    def __init__(self, input_dim, embedding_dim=32):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(input_dim, 64),
            nn.ReLU(),
            nn.Linear(64, 32),
            nn.ReLU(),
            nn.Linear(32, embedding_dim)
        )

    def forward(self, x):
        return self.net(x)

model = SVDDNet(input_dim=X_train.shape[1]).to(device)

In [5]:
# Initialize SVDD center using benign embeddings
with torch.no_grad():
    c = model(X_train_tensor).mean(dim=0)

# Loss function
def svdd_loss(output, center):
    return torch.mean(torch.sum((output - center) ** 2, dim=1))

In [6]:
# Training
optimizer = optim.Adam(model.parameters(), lr=1e-3)
model.train()
num_epochs = 30
for epoch in range(num_epochs):
    optimizer.zero_grad()
    outputs = model(X_train_tensor)
    loss = torch.mean(torch.sum((outputs - c) ** 2, dim=1)) + 1e-4 * torch.norm(outputs, p=2)
    loss.backward()
    optimizer.step()
    print(f"Epoch {epoch+1}/{num_epochs}, Loss: {loss.item():.4f}")

Epoch 1/30, Loss: 0.5175
Epoch 2/30, Loss: 0.4374
Epoch 3/30, Loss: 0.3740
Epoch 4/30, Loss: 0.3231
Epoch 5/30, Loss: 0.2817
Epoch 6/30, Loss: 0.2476
Epoch 7/30, Loss: 0.2195
Epoch 8/30, Loss: 0.1956
Epoch 9/30, Loss: 0.1754
Epoch 10/30, Loss: 0.1584
Epoch 11/30, Loss: 0.1438
Epoch 12/30, Loss: 0.1317
Epoch 13/30, Loss: 0.1213
Epoch 14/30, Loss: 0.1125
Epoch 15/30, Loss: 0.1052
Epoch 16/30, Loss: 0.0989
Epoch 17/30, Loss: 0.0935
Epoch 18/30, Loss: 0.0887
Epoch 19/30, Loss: 0.0845
Epoch 20/30, Loss: 0.0807
Epoch 21/30, Loss: 0.0773
Epoch 22/30, Loss: 0.0742
Epoch 23/30, Loss: 0.0714
Epoch 24/30, Loss: 0.0690
Epoch 25/30, Loss: 0.0668
Epoch 26/30, Loss: 0.0648
Epoch 27/30, Loss: 0.0630
Epoch 28/30, Loss: 0.0613
Epoch 29/30, Loss: 0.0598
Epoch 30/30, Loss: 0.0584


In [7]:
# Evaluation
model.eval()
with torch.no_grad():
    test_embeddings = model(X_test_tensor)
    dists = torch.sum((test_embeddings - c) ** 2, dim=1).cpu().numpy()

y_true = y_test

In [8]:
# Replace linear spacing with log spacing
thresholds = np.logspace(-8, -2, 100)

best_f1 = 0
best_thresh = 0

for thresh in thresholds:
    y_pred = (dists > thresh).astype(int)
    f1 = f1_score(y_true, y_pred)
    if f1 > best_f1:
        best_f1 = f1
        best_thresh = thresh

print(f"\nBest threshold: {best_thresh:.6f}, Best F1: {best_f1:.4f}")



Best threshold: 0.010000, Best F1: 0.9507


In [9]:
# Final prediction
y_pred = (dists > best_thresh).astype(int)
print("\nConfusion Matrix:")
print(confusion_matrix(y_true, y_pred))
print("\nClassification Report:")
print(classification_report(y_true, y_pred, target_names=['Benign', 'Attack']))


Confusion Matrix:
[[ 46626  34488]
 [     0 332669]]

Classification Report:
              precision    recall  f1-score   support

      Benign       1.00      0.57      0.73     81114
      Attack       0.91      1.00      0.95    332669

    accuracy                           0.92    413783
   macro avg       0.95      0.79      0.84    413783
weighted avg       0.92      0.92      0.91    413783

