# NN Multiclass

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from tqdm import tqdm
import pandas as pd
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.model_selection import train_test_split
from rich import print

In [None]:
# Load dataset
df = pd.read_csv('dataset/train_sel_hclust.csv')

In [None]:
x_train = df.drop(['is_benign', 'category', 'attack'], axis=1)
y_train = df['category'] 

# Encode labels as integers
label_encoder = LabelEncoder()
y_train = label_encoder.fit_transform(y_train)

# Convert to PyTorch tensors
x_tensor = torch.tensor(x_train, dtype=torch.float32)
y_tensor = torch.tensor(y_train, dtype=torch.long)

# Split data into train and validation sets
x_train_tensor, x_val_tensor, y_train_tensor, y_val_tensor = train_test_split(
    x_tensor, y_tensor, test_size=0.3, random_state=42
)

# Normalize data
scaler = StandardScaler()
x_train_tensor = scaler.fit_transform(x_train_tensor)
x_val_tensor = scaler.transform(x_val_tensor)

# Create DataLoader objects for batching
train_dataset = TensorDataset(x_train_tensor, y_train_tensor)
val_dataset = TensorDataset(x_val_tensor, y_val_tensor)

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=64, shuffle=True)

In [None]:
# Define the model
class NeuralNet(nn.Module):
    def __init__(self, input_size, num_classes):
        super(NeuralNet, self).__init__()
        self.model = nn.Sequential(
            nn.Linear(input_size, 232),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(232, 116),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(116, 58),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(58, num_classes)
        )

    def forward(self, x):
        return self.model(x)

In [None]:
# Model, loss, and optimizer
input_size = x_train.shape[1]
num_classes = len(label_encoder.classes_)

model = NeuralNet(input_size, num_classes)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Training loop
epochs = 10
for epoch in range(epochs):
    model.train()  # Set model to training mode
    epoch_loss = 0
    num_batches = len(train_loader)

    # Progress bar for training
    with tqdm(train_loader, desc=f"Epoch {epoch + 1}/{epochs}") as progress_bar:
        for batch_X, batch_y in progress_bar:
            optimizer.zero_grad()  # Clear gradients
            outputs = model(batch_X)  # Forward pass
            loss = criterion(outputs, batch_y)  # Compute loss
            loss.backward()  # Backpropagation
            optimizer.step()  # Update weights

            # Accumulate loss and update progress bar
            epoch_loss += loss.item()
            progress_bar.set_postfix(loss=loss.item())

    # Validation step
    model.eval()  # Set model to evaluation mode
    val_loss = 0
    correct = 0
    with torch.no_grad():
        for batch_X, batch_y in val_loader:
            outputs = model(batch_X)
            loss = criterion(outputs, batch_y)
            val_loss += loss.item()
            
            # Calculate accuracy
            _, predicted = torch.max(outputs, 1)
            correct += (predicted == batch_y).sum().item()

    val_loss /= len(val_loader)
    accuracy = correct / len(val_dataset)

    print(f"Epoch {epoch + 1}/{epochs} - Train Loss: {epoch_loss / num_batches:.4f} - Validation Loss: {val_loss:.4f} - Accuracy: {accuracy:.4f}")


In [None]:
torch.save(model.state_dict(), "nn_model.pth")