# NN Multiclass

In [None]:
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, TensorDataset
from tqdm import tqdm
import pandas as pd
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.model_selection import train_test_split
from rich import print
from sklearn.metrics import classification_report, confusion_matrix
import numpy as np

In [3]:
# Load dataset (let's try with all the features)
df = pd.read_csv('dataset/merged_train.csv')

In [4]:
x_train = df.drop(['category', 'attack'], axis=1)
y_train = df['category']

# Encode labels as integers
label_encoder = LabelEncoder()
y_train = label_encoder.fit_transform(y_train)

# Convert to PyTorch tensors
# First convert DataFrame to numpy array, then to tensor
x_tensor = torch.tensor(x_train.values, dtype=torch.float32)  # Add .values here
y_tensor = torch.tensor(y_train, dtype=torch.long)

# Split data into train and validation sets
x_train_tensor, x_val_tensor, y_train_tensor, y_val_tensor = train_test_split(
    x_tensor, y_tensor, test_size=0.3, random_state=42
)

# Convert tensors to numpy arrays before scaling
x_train_np = x_train_tensor.numpy()
x_val_np = x_val_tensor.numpy()

# Apply scaling
scaler = StandardScaler()
x_train_scaled = scaler.fit_transform(x_train_np)
x_val_scaled = scaler.transform(x_val_np)

# Convert back to tensors
x_train_tensor = torch.tensor(x_train_scaled, dtype=torch.float32)
x_val_tensor = torch.tensor(x_val_scaled, dtype=torch.float32)

# Now create the datasets
train_dataset = TensorDataset(x_train_tensor, y_train_tensor)
val_dataset = TensorDataset(x_val_tensor, y_val_tensor)

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=64, shuffle=True)

In [5]:
class FeatureExtractor(nn.Module):
    def __init__(self, input_size):
        super().__init__()
        self.layers = nn.Sequential(
            nn.Linear(input_size, 256),
            nn.BatchNorm1d(256),
            nn.LeakyReLU(0.1),
            nn.Dropout(0.1)
        )
        
    def forward(self, x):
        return self.layers(x)

class ResidualBlock(nn.Module):
    def __init__(self, size):
        super().__init__()
        self.block = nn.Sequential(
            nn.BatchNorm1d(size),
            nn.Linear(size, 2*size),
            nn.LeakyReLU(0.1),
            nn.Linear(2*size, size),
            nn.Dropout(0.1),
        )
        self.activation = nn.LeakyReLU(0.1)
        
    def forward(self, x):
        identity = x
        out = self.block(x)
        out += identity
        return self.activation(out)

class ImprovedNeuralNet(nn.Module):
    def __init__(self, input_size, num_classes):
        super().__init__()
        
        # Feature extraction path
        self.feature_extractor = FeatureExtractor(input_size)
        
        # Main processing path with residual connections
        self.main_path = nn.Sequential(
            ResidualBlock(256),
            ResidualBlock(256),
            nn.Linear(256, 128),
            nn.BatchNorm1d(128),
            nn.LeakyReLU(0.2),
            nn.Dropout(0.2)
        )
        # Classification head
        self.classifier = nn.Sequential(
            nn.Linear(128, num_classes)
        )
        
        # Initialize weights
        self._initialize_weights()
        
    def _initialize_weights(self):
        for m in self.modules():
            if isinstance(m, nn.Linear):
                nn.init.kaiming_normal_(m.weight, mode='fan_in', nonlinearity='leaky_relu')
                if m.bias is not None:
                    nn.init.constant_(m.bias, 0)
            elif isinstance(m, nn.BatchNorm1d):
                nn.init.constant_(m.weight, 1)
                nn.init.constant_(m.bias, 0)
    
    def forward(self, x):
        # Extract features
        features = self.feature_extractor(x)
        
        # Process through main path
        main_features = self.main_path(features)
       
        # Classification
        output = self.classifier(main_features)
        
        return output

def get_optimizer(model, learning_rate=0.001, weight_decay=1e-5):
    return torch.optim.AdamW(
        model.parameters(),
        lr=learning_rate,
        weight_decay=weight_decay,
        betas=(0.9, 0.999)
    )

def get_scheduler(optimizer):
    return torch.optim.lr_scheduler.ReduceLROnPlateau(
        optimizer,
        mode='min',
        factor=0.1,
        patience=5
    )

In [6]:
# Model, loss, and optimizer
input_size = x_train.shape[1]
num_classes = len(label_encoder.classes_)

model = ImprovedNeuralNet(input_size, num_classes)
optimizer = get_optimizer(model)
scheduler = get_scheduler(optimizer)

# Calculate class weights
class_counts = torch.bincount(y_train_tensor)
total_samples = len(y_train_tensor)
class_weights = total_samples / (len(class_counts) * class_counts)
criterion = nn.CrossEntropyLoss(weight=class_weights)

# Training loop
epochs = 20
best_val_loss = float('inf')
best_accuracy = 0.0
patience = 10  # Number of epochs to wait before early stopping
counter = 0  # Counter for patience

for epoch in range(epochs):
    model.train()
    epoch_loss = 0
    num_batches = len(train_loader)

    with tqdm(train_loader, desc=f"Epoch {epoch + 1}/{epochs}") as progress_bar:
        for batch_X, batch_y in progress_bar:
            optimizer.zero_grad()
            outputs = model(batch_X)
            loss = criterion(outputs, batch_y)
            loss.backward()
            optimizer.step()

            epoch_loss += loss.item()
            progress_bar.set_postfix(loss=loss.item())

    # Validation step
    model.eval()
    val_loss = 0
    correct = 0
    with torch.no_grad():
        for batch_X, batch_y in val_loader:
            outputs = model(batch_X)
            loss = criterion(outputs, batch_y)
            val_loss += loss.item()

            _, predicted = torch.max(outputs, 1)
            correct += (predicted == batch_y).sum().item()

    val_loss /= len(val_loader)
    accuracy = correct / len(val_dataset)

    # Learning rate scheduling
    scheduler.step(val_loss)

    # Save model if it's the best so far (based on accuracy)
    if accuracy > best_accuracy:
        best_accuracy = accuracy
        best_val_loss = val_loss
        torch.save({
            'epoch': epoch,
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
            'scheduler_state_dict': scheduler.state_dict(),
            'best_accuracy': best_accuracy,
            'best_val_loss': best_val_loss
        }, "best_model.pth")
        counter = 0  # Reset counter
    else:
        counter += 1  # Increment counter if accuracy didn't improve

    print(f"Epoch {epoch + 1}/{epochs} - Train Loss: {epoch_loss / num_batches:.4f} - Validation Loss: {val_loss:.4f} - Accuracy: {accuracy:.4f} - Best Accuracy: {best_accuracy:.4f}")

    # If we've waited for patience epochs with no improvement, load best model and reduce learning rate
    if counter >= patience:
        print(f"No improvement for {patience} epochs. Loading best model...")
        exit()

Epoch 1/20: 100%|██████████| 13513/13513 [02:46<00:00, 81.18it/s, loss=0.222] 


Epoch 2/20: 100%|██████████| 13513/13513 [02:20<00:00, 96.07it/s, loss=0.243] 


Epoch 3/20: 100%|██████████| 13513/13513 [02:23<00:00, 94.07it/s, loss=0.312] 


Epoch 4/20: 100%|██████████| 13513/13513 [02:24<00:00, 93.40it/s, loss=0.497] 


Epoch 5/20: 100%|██████████| 13513/13513 [02:23<00:00, 94.29it/s, loss=0.346] 


Epoch 6/20: 100%|██████████| 13513/13513 [02:18<00:00, 97.36it/s, loss=0.725] 


Epoch 7/20: 100%|██████████| 13513/13513 [02:18<00:00, 97.35it/s, loss=0.355] 


Epoch 8/20: 100%|██████████| 13513/13513 [02:18<00:00, 97.87it/s, loss=0.206] 


Epoch 9/20: 100%|██████████| 13513/13513 [02:17<00:00, 98.02it/s, loss=0.363] 


Epoch 10/20: 100%|██████████| 13513/13513 [02:13<00:00, 101.11it/s, loss=0.545]


Epoch 11/20: 100%|██████████| 13513/13513 [02:12<00:00, 101.90it/s, loss=0.244]


Epoch 12/20: 100%|██████████| 13513/13513 [02:12<00:00, 101.74it/s, loss=0.186]


Epoch 13/20: 100%|██████████| 13513/13513 [02:12<00:00, 101.75it/s, loss=0.26] 


Epoch 14/20: 100%|██████████| 13513/13513 [02:09<00:00, 104.51it/s, loss=0.374]


Epoch 15/20: 100%|██████████| 13513/13513 [02:09<00:00, 104.12it/s, loss=0.318]


Epoch 16/20: 100%|██████████| 13513/13513 [02:09<00:00, 104.01it/s, loss=0.322]


Epoch 17/20: 100%|██████████| 13513/13513 [02:07<00:00, 106.14it/s, loss=0.253]


Epoch 18/20: 100%|██████████| 13513/13513 [01:56<00:00, 116.38it/s, loss=0.376]


Epoch 19/20: 100%|██████████| 13513/13513 [01:43<00:00, 130.29it/s, loss=0.262]


Epoch 20/20: 100%|██████████| 13513/13513 [02:05<00:00, 107.30it/s, loss=0.255]


In [7]:
# At the end, load the best model
checkpoint = torch.load("best_model.pth", weights_only=True)
model.load_state_dict(checkpoint['model_state_dict'])

<All keys matched successfully>

In [8]:
#  Make predictions on the validation set
y_pred = []
y_true = []

with torch.no_grad():
    for inputs, labels in val_loader:
        outputs = model(inputs)
        _, predicted = torch.max(outputs, 1)
        y_pred.extend(predicted.cpu().numpy())
        y_true.extend(labels.cpu().numpy())

# Convert lists to numpy arrays
y_pred = np.array(y_pred)
y_true = np.array(y_true)

# Print classification report
print(classification_report(y_true, y_pred, target_names=label_encoder.classes_))

# Print confusion matrix
cm = confusion_matrix(y_true, y_pred)
cm

array([[ 50790,      0,      4,    142,    388,   6340],
       [     4, 140952,  34075,      2,      0,      0],
       [     0,  42410,  42898,      7,      0,      0],
       [    68,      0,      7,  31120,     47,    798],
       [   361,      0,      0,     10,  14448,   1024],
       [   187,      0,      0,     23,     20,   4515]])