# NN Multiclass

In [1]:
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, TensorDataset
from tqdm import tqdm
import pandas as pd
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.model_selection import train_test_split
from rich import print
from sklearn.metrics import classification_report, confusion_matrix
import numpy as np
from pathlib import Path

In [2]:
def prepare_data_loaders(df, batch_size=64, test_size=0.3, random_state=42):
    """
    Prepare data loaders for training and validation
    """
    # Initialize preprocessing objects
    label_encoder = LabelEncoder()
    scaler = StandardScaler()

    # Prepare features and labels
    x_train = df.drop(['category', 'attack'], axis=1).values
    y_train = df['category']

    # Encode labels as integers
    y_train = label_encoder.fit_transform(y_train)

    # Split data
    x_train_data, x_val_data, y_train_data, y_val_data = train_test_split(
        x_train, y_train, test_size=test_size, random_state=random_state
    )

    # Scale features
    x_train_scaled = scaler.fit_transform(x_train_data)
    x_val_scaled = scaler.transform(x_val_data)

    # Convert to PyTorch tensors
    x_train_tensor = torch.tensor(x_train_scaled, dtype=torch.float32)
    x_val_tensor = torch.tensor(x_val_scaled, dtype=torch.float32)
    y_train_tensor = torch.tensor(y_train_data, dtype=torch.long)
    y_val_tensor = torch.tensor(y_val_data, dtype=torch.long)

    # Create datasets and loaders
    train_dataset = TensorDataset(x_train_tensor, y_train_tensor)
    val_dataset = TensorDataset(x_val_tensor, y_val_tensor)

    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)

    return {
        'train_loader': train_loader,
        'val_loader': val_loader,
        'val_dataset': val_dataset,
        'input_size': x_train.shape[1],
        'num_classes': len(label_encoder.classes_),
        'class_counts': torch.bincount(y_train_tensor),
        'y_train_tensor': y_train_tensor,
        'classes': label_encoder.classes_
    }

In [3]:
class FeatureExtractor(nn.Module):
    def __init__(self, input_size):
        super().__init__()
        self.layers = nn.Sequential(
            nn.Linear(input_size, 512),
            nn.BatchNorm1d(512),
            nn.LeakyReLU(0.1),
            nn.Dropout(0.1)
        )

    def forward(self, x):
        return self.layers(x)

class ResidualBlock(nn.Module):
    def __init__(self, size):
        super().__init__()
        self.block = nn.Sequential(
            nn.BatchNorm1d(size),
            nn.Linear(size, 2*size),
            nn.LeakyReLU(0.1),
            nn.Linear(2*size, size),
            nn.Dropout(0.1),
        )
        self.activation = nn.LeakyReLU(0.1)

    def forward(self, x):
        identity = x
        out = self.block(x)
        out += identity
        return self.activation(out)

class NeuralNetwork(nn.Module):
    def __init__(self, input_size, num_classes):
        super().__init__()

        # Feature extraction path
        self.feature_extractor = FeatureExtractor(input_size)

        # Main processing path with residual connections
        self.main_path = nn.Sequential(
          ResidualBlock(512),
          ResidualBlock(512),
          ResidualBlock(512),
          nn.Linear(512, 256),
          nn.BatchNorm1d(256),
          nn.LeakyReLU(0.2),
          nn.Dropout(0.1)
        )

        self.classifier = nn.Sequential(
            nn.Linear(256, num_classes)
        )

        self._initialize_weights()

    def _initialize_weights(self):
        for m in self.modules():
            if isinstance(m, nn.Linear):
                nn.init.kaiming_normal_(m.weight, mode='fan_in', nonlinearity='leaky_relu')
                if m.bias is not None:
                    nn.init.constant_(m.bias, 0)
            elif isinstance(m, nn.BatchNorm1d):
                nn.init.constant_(m.weight, 1)
                nn.init.constant_(m.bias, 0)

    def forward(self, x):
        # Extract features
        features = self.feature_extractor(x)

        # Process through main path
        main_features = self.main_path(features)

        # Classification
        output = self.classifier(main_features)

        return output

def get_optimizer(model, learning_rate=0.001, weight_decay=1e-5):
    """
    Create optimizer for the model
    """
    return torch.optim.AdamW(
        model.parameters(),
        lr=learning_rate,
        weight_decay=weight_decay,
        betas=(0.9, 0.999)
    )

def get_scheduler(optimizer):
    """
    Create learning rate scheduler
    """
    return torch.optim.lr_scheduler.ReduceLROnPlateau(
        optimizer,
        mode='min',
        factor=0.1,
        patience=5
    )

# Example of model initialization (to be used in the training loop):
def initialize_model(input_size, num_classes, device):
    """
    Initialize the model, optimizer, and scheduler
    """
    model = NeuralNetwork(input_size, num_classes).to(device)
    optimizer = get_optimizer(model)
    scheduler = get_scheduler(optimizer)

    return model, optimizer, scheduler

In [None]:
def train_model(model, train_loader, val_loader, val_dataset, criterion, optimizer,
                scheduler, epochs, device, dataset_name, save_dir, goat):
    """
    Train the model and return best performance metrics
    """
    best_val_loss = float('inf')
    best_accuracy = 0.0
    patience = 10
    counter = 0

    if goat != 0:
      patience = 5

    for epoch in range(epochs):
        # Training phase
        model.train()
        epoch_loss = 0
        num_batches = len(train_loader)

        with tqdm(train_loader, desc=f"{dataset_name} - Epoch {epoch + 1}/{epochs}", bar_format='{desc}: {elapsed}') as progress_bar:
            for batch_X, batch_y in progress_bar:
                batch_X, batch_y = batch_X.to(device), batch_y.to(device)

                optimizer.zero_grad()
                outputs = model(batch_X)
                loss = criterion(outputs, batch_y)
                loss.backward()
                optimizer.step()

                epoch_loss += loss.item()
                progress_bar.set_postfix(loss=loss.item())

        # Validation phase
        model.eval()
        val_loss = 0
        correct = 0
        with torch.no_grad():
            for batch_X, batch_y in val_loader:
                batch_X, batch_y = batch_X.to(device), batch_y.to(device)
                outputs = model(batch_X)
                loss = criterion(outputs, batch_y)
                val_loss += loss.item()

                _, predicted = torch.max(outputs, 1)
                correct += (predicted == batch_y).sum().item()

        val_loss /= len(val_loader)
        accuracy = correct / len(val_dataset)

        # Learning rate scheduling
        scheduler.step(val_loss)

        # Save best model
        if accuracy > best_accuracy:
            best_accuracy = accuracy
            best_val_loss = val_loss
            save_path = Path(save_dir) / f"best_model_{dataset_name}.pth"
            torch.save({
                'epoch': epoch,
                'model_state_dict': model.state_dict(),
                'optimizer_state_dict': optimizer.state_dict(),
                'scheduler_state_dict': scheduler.state_dict(),
                'best_accuracy': best_accuracy,
                'best_val_loss': best_val_loss
            }, save_path)
            counter = 0
            
            if best_accuracy > goat:
              goat = best_accuracy
              patience = 10
        else:
            counter += 1

        print(f"{dataset_name} - Epoch {epoch + 1}/{epochs} - "
              f"Train Loss: {epoch_loss / num_batches:.4f} - "
              f"Validation Loss: {val_loss:.4f} - "
              f"Accuracy: {accuracy:.4f} - "
              f"Best Accuracy: {best_accuracy:.4f}")

        if counter >= patience:
            print(f"No improvement for {patience} epochs. Early stopping...")
            break

    return best_accuracy, best_val_loss, goat

def evaluate_model(model, val_loader, classes, device):
    """
    Evaluate the model and print classification metrics
    """
    y_pred = []
    y_true = []

    model.eval()
    with torch.no_grad():
        for inputs, labels in val_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            _, predicted = torch.max(outputs, 1)
            y_pred.extend(predicted.cpu().numpy())
            y_true.extend(labels.cpu().numpy())

    y_pred = np.array(y_pred)
    y_true = np.array(y_true)

    print("\nClassification Report:")
    print(classification_report(y_true, y_pred, target_names=classes))

    print("\nConfusion Matrix:")
    cm = confusion_matrix(y_true, y_pred)
    print(cm)

def train_on_multiple_datasets(dataset_paths, save_dir='/kaggle/working'):
    """
    Train the model sequentially on multiple datasets
    """
    # Setup device
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print(f"Using device: {device}")

    # Create save directory
    save_dir = Path(save_dir)
    #os.makedirs("/kaggle/working/model_checkpoints", exist_ok=True)

    # Training configuration
    config = {
        'batch_size': 64,
        'epochs': 20,
        'learning_rate': 0.001,
        'weight_decay': 1e-5,
    }

    results = {}
    goat = 0.0

    # Train on each dataset sequentially
    for dataset_path in dataset_paths:
        dataset_name = Path(dataset_path).stem
        print(f"\nTraining on dataset: {dataset_name}")

        # Load and prepare data
        df = pd.read_csv(dataset_path, low_memory=False)
        data = prepare_data_loaders(df, batch_size=config['batch_size'])

        # Initialize model, optimizer, and criterion
        model = NeuralNetwork(data['input_size'], data['num_classes']).to(device)
        optimizer = get_optimizer(model, config['learning_rate'], config['weight_decay'])
        scheduler = get_scheduler(optimizer)

        # Calculate class weights for balanced training
        total_samples = len(data['y_train_tensor'])
        class_weights = total_samples / (len(data['class_counts']) * data['class_counts'])
        criterion = nn.CrossEntropyLoss(weight=class_weights.to(device))

        # Train the model
        best_accuracy, best_val_loss, goat = train_model(
            model=model,
            train_loader=data['train_loader'],
            val_loader=data['val_loader'],
            val_dataset=data['val_dataset'],
            criterion=criterion,
            optimizer=optimizer,
            scheduler=scheduler,
            epochs=config['epochs'],
            device=device,
            dataset_name=dataset_name,
            save_dir=save_dir,
            goat=goat
        )

        # Check if the best model file exists
        best_model_path = Path(save_dir) / f"best_model_{dataset_name}.pth"
        if not best_model_path.exists():
            print(f"Best model file for {dataset_name} not found. Skipping evaluation.")
            continue

        # Load best model for evaluation
        checkpoint = torch.load(best_model_path, weights_only=True)
        model.load_state_dict(checkpoint['model_state_dict'])

        # Evaluate model
        print(f"\nEvaluating model for dataset: {dataset_name}")
        evaluate_model(model, data['val_loader'], data['classes'], device)

        # Store results
        results[dataset_name] = {
            'best_accuracy': best_accuracy,
            'best_val_loss': best_val_loss
        }

    # Print final results summary
    print("\nTraining Results Summary:")
    for dataset_name, metrics in results.items():
        print(f"\n{dataset_name}:")
        print(f"Best Accuracy: {metrics['best_accuracy']:.4f}")
        print(f"Best Validation Loss: {metrics['best_val_loss']:.4f}")

In [5]:
dataset_paths = [
    "/kaggle/input/dataset/train_augmented.csv",
    "/kaggle/input/dataset/train_sel_hclust.csv",
    "/kaggle/input/dataset/merged_train.csv"
]

train_on_multiple_datasets(dataset_paths)

train_augmented - Epoch 1/20: 00:09


train_augmented - Epoch 2/20: 00:08


train_augmented - Epoch 3/20: 00:08


train_augmented - Epoch 4/20: 00:08


train_augmented - Epoch 5/20: 00:08


train_augmented - Epoch 6/20: 00:08


train_augmented - Epoch 7/20: 00:09


train_augmented - Epoch 8/20: 00:09


train_augmented - Epoch 9/20: 00:08


train_augmented - Epoch 10/20: 00:08


train_augmented - Epoch 11/20: 00:09


train_augmented - Epoch 12/20: 00:08


train_augmented - Epoch 13/20: 00:09


train_augmented - Epoch 14/20: 00:08


train_augmented - Epoch 15/20: 00:08


train_augmented - Epoch 16/20: 00:09


train_augmented - Epoch 17/20: 00:08


train_augmented - Epoch 18/20: 00:08


train_augmented - Epoch 19/20: 00:09


train_augmented - Epoch 20/20: 00:08


  checkpoint = torch.load(best_model_path)


train_sel_hclust - Epoch 1/20: 01:16


train_sel_hclust - Epoch 2/20: 01:16


train_sel_hclust - Epoch 3/20: 01:15


train_sel_hclust - Epoch 4/20: 01:15


train_sel_hclust - Epoch 5/20: 01:16


train_sel_hclust - Epoch 6/20: 01:15


train_sel_hclust - Epoch 7/20: 01:16


train_sel_hclust - Epoch 8/20: 01:15


  checkpoint = torch.load(best_model_path)


merged_train - Epoch 1/20: 01:15


merged_train - Epoch 2/20: 01:14


merged_train - Epoch 3/20: 01:15


merged_train - Epoch 4/20: 01:15


merged_train - Epoch 5/20: 01:15


merged_train - Epoch 6/20: 01:16


  checkpoint = torch.load(best_model_path)
