<a href="https://colab.research.google.com/github/grillinr/evolutionary-computing/blob/main/final/final_proj.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Import libraries and seed for easier checking

In [None]:
import random
import os
import argparse

import numpy as np
import pandas as pd

import torch
import torch.nn as nn
import torch.optim as optim

from sklearn.metrics import accuracy_score, fbeta_score, precision_score, recall_score
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler


SEED = 5173
device = torch.device("cpu")

random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
torch.cuda.manual_seed(SEED)
torch.cuda.manual_seed_all(SEED)

# Define helper functions

In [52]:
def prepare_data(data, device):
    # Drop non-numeric and class columns
    data = data.dropna()
    X = data.drop(columns=["id", "record", "type"]).values.astype(np.float32)

    # Convert class to numeric value from 0-4
    y = data["type"].astype("category").cat.codes.values

    scaler = StandardScaler()
    X = scaler.fit_transform(X)

    X_tensor = torch.tensor(X, dtype=torch.float32).to(device)
    y_tensor = torch.tensor(y, dtype=torch.long).to(device)

    return X_tensor, y_tensor


def evaluate(model, device, data, criterion):
    model.eval()

    X, y = prepare_data(data, device)

    with torch.no_grad():
        logits = model(X)
        loss = criterion(logits, y)
        probs = torch.softmax(logits, dim=1).cpu().numpy()

    y_true = y.cpu().numpy()
    y_pred = probs.argmax(axis=1)

    acc = accuracy_score(y_true, y_pred)
    f_beta_m = fbeta_score(y_true, y_pred, average="macro", beta=2, zero_division=0)
    return {
        "loss": loss.item(),
        "accuracy": acc,
        "f_beta_macro": f_beta_m,
    }

# Create Model Architecture (DNN)

In [5]:
class DNN(nn.Module):
    def __init__(self, input_size=32, hidden=(32, 16, 8), num_classes=5, dropout_rate=0.5):
        super().__init__()
        layers = []
        input_dim = input_size

        for h in hidden:
            layers.append(nn.Linear(input_dim, h))
            layers.append(nn.ReLU(inplace=True))
            layers.append(nn.Dropout(dropout_rate))
            input_dim = h

        layers.append(nn.Linear(input_dim, num_classes))
        self.net = nn.Sequential(*layers)

    def forward(self, x):
        return self.net(x)

# Main Training loop

In [57]:
# Load data
dataset = pd.read_csv("/content/train.csv")
train_dataset, val_dataset = train_test_split(dataset, train_size=0.7, random_state=SEED)
X_train, y_train = prepare_data(train_dataset, device)
# test_dataset = pd.read_csv("test.csv")

In [58]:
# Configuration
lr = 1e-3
epochs = 1000
hidden = (32, 16, 8)
dropout_rate = 0.5
patience = 30

# Create model
model = DNN(hidden=hidden, dropout_rate=dropout_rate).to(device)

class_counts = train_dataset['type'].value_counts()
weights = 1.0 / class_counts.values
weights = torch.FloatTensor(weights).to(device)
criterion = nn.CrossEntropyLoss(weight=weights)
optimizer = optim.Adam(model.parameters(), lr=lr)

In [61]:
# Training loop with early stopping
best_val_loss = float('inf')
patience_counter = 0

for epoch in range(1, epochs + 1):
    model.train()
    optimizer.zero_grad()
    out = model(X_train)
    loss = criterion(out, y_train)
    loss.backward()
    optimizer.step()
    train_loss = loss.item()

    train_metrics = evaluate(model, device, train_dataset, criterion)
    val_metrics = evaluate(model, device, val_dataset, criterion)

    if epoch % 10 == 0:
      print(
          f"Epoch {epoch}/{epochs} | "
          f"train_loss={train_loss:.4f} train_acc={train_metrics['accuracy']:.4f} "
          f"train_f1={train_metrics['f_beta_macro']:.4f} "
          f"val_loss={val_metrics['loss']:.4f} val_acc={val_metrics['accuracy']:.4f} "
          f"val_f1={val_metrics['f_beta_macro']:.4f} "
      )

    # Early stopping check
    if val_metrics['loss'] < best_val_loss:
        best_val_loss = val_metrics['loss']
        patience_counter = 0
    else:
        patience_counter += 1
        if patience_counter >= patience:
            print(f"Early stopping at epoch {epoch}")
            break

Epoch 10/1000 | train_loss=1.6844 train_acc=0.0265 train_f1=0.0260 val_loss=1.6948 val_acc=0.0266 val_f1=0.0272 
Epoch 20/1000 | train_loss=1.5750 train_acc=0.0369 train_f1=0.0615 val_loss=1.5649 val_acc=0.0368 val_f1=0.0605 


KeyboardInterrupt: 

# Test output

In [None]:
X_test, y_test = prepare_data(test_dataset, device)

# Get predictions
with torch.no_grad():
    logits = model(X_test)
    probs = torch.softmax(logits, dim=1).cpu().numpy()
    predictions = probs.argmax(axis=1)

y_true = y_test.cpu().numpy()

# Calculate metrics
accuracy = accuracy_score(y_true, predictions)
f_beta = fbeta_score(y_true, predictions, average="macro", beta=2)

print(f"Test Accuracy: {accuracy:.4f}")
print(f"Test F-Beta (macro): {f_beta:.4f}")