In [19]:
import os
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
import torchvision.models as models
from torch.utils.data import Dataset, DataLoader, random_split
from PIL import Image
from sklearn.model_selection import train_test_split
import math

In [20]:
# Step 1: Load the CSV dataset
def load_data(csv_path):
    data = pd.read_csv(csv_path, low_memory=False)
    data = data.sample(frac=0.1, random_state=42)  # Shuffle data
    return data

In [21]:
# Step 2: Convert numerical features to grayscale images
def convert_to_image(data, save_dir):
    os.makedirs(save_dir, exist_ok=True)

    # Identify the label column dynamically
    label_col = None
    for col in data.columns:
        if "label" in col.lower() or "class" in col.lower() or "attack" in col.lower():
            label_col = col
            break

    if label_col is None:
        raise ValueError("No label column found in dataset. Check column names!")

    # Extract feature columns (excluding the label)
    feature_columns = data.select_dtypes(include=[np.number]).columns.tolist()
    if label_col in feature_columns:
        feature_columns.remove(label_col)

    print(f"Using label column: {label_col}")  # Debugging output

    images, labels = [], []
    num_features = len(feature_columns)
    image_size = math.ceil(math.sqrt(num_features))  # Find the next perfect square

    for i, row in enumerate(data.iterrows()):
        padded_features = np.pad(row[1][feature_columns].values, (0, image_size**2 - num_features), 'constant')
        features = padded_features.reshape(image_size, image_size)
        
        img = Image.fromarray((features * 255).astype(np.uint8), mode='L')
        img.save(f"{save_dir}/img_{i}.png")
        images.append(f"{save_dir}/img_{i}.png")
        labels.append(row[1][label_col])  # Use dynamically detected label column

    return images, labels

In [22]:
# Step 3: Create a custom PyTorch Dataset class
class DDoSDataset(Dataset):
    def __init__(self, image_paths, labels, transform=None):
        self.image_paths = image_paths
        self.labels = labels
        self.transform = transform
    
    def __len__(self):
        return len(self.image_paths)
    
    def __getitem__(self, idx):
        image = Image.open(self.image_paths[idx]).convert("RGB")
        label = self.labels[idx]
        if self.transform:
            image = self.transform(image)
        return image, torch.tensor(label, dtype=torch.long)


In [23]:
# Step 4: Define data transformations
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
])

In [None]:
# Step 5: Load data, convert to images
data = load_data("/Users/book_kuno/Downloads/DDoS 2018/02-21-2018.csv")
image_paths, labels = convert_to_image(data, "./ddos_images")

In [None]:
# Step 6: Create DataLoaders
train_dataset = DDoSDataset(train_imgs, train_labels, transform)
val_dataset = DDoSDataset(val_imgs, val_labels, transform)
test_dataset = DDoSDataset(test_imgs, test_labels, transform)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

In [None]:
# Step 7: Load pre-trained ResNet-18 and modify it for DDoS detection
model = models.resnet18(pretrained=True)
num_features = model.fc.in_features
model.fc = nn.Linear(num_features, 2)  # Binary classification
model = model.to(torch.device("cuda" if torch.cuda.is_available() else "cpu"))

In [None]:
# Step 8: Define loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [None]:
# Step 9: Train the model
def train_model(model, train_loader, val_loader, criterion, optimizer, epochs=10):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.train()
    for epoch in range(epochs):
        total_loss = 0
        for images, labels in train_loader:
            images, labels = images.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            total_loss += loss.item()
        print(f"Epoch {epoch+1}/{epochs}, Loss: {total_loss/len(train_loader)}")
    return model

model = train_model(model, train_loader, val_loader, criterion, optimizer, epochs=10)


In [None]:
# Step 10: Evaluate on test data
def evaluate_model(model, test_loader):
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for images, labels in test_loader:
            images, labels = images.to(torch.device("cuda" if torch.cuda.is_available() else "cpu")), labels.to(torch.device("cuda" if torch.cuda.is_available() else "cpu"))
            outputs = model(images)
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    print(f"Test Accuracy: {100 * correct / total:.2f}%")

evaluate_model(model, test_loader)