<a href="https://colab.research.google.com/github/grillinr/evolutionary-computing/blob/main/final/final_proj.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Import libraries and seed for easier checking

In [71]:
import random
import os
import argparse
import math
from typing import List, Tuple


import numpy as np
import pandas as pd

import torch
import torch.nn as nn
import torch.optim as optim

from sklearn.metrics import accuracy_score, fbeta_score, precision_score, recall_score
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler


SEED = 5173
device = torch.device("cpu") if not torch.cuda.is_available() else torch.device("cuda")
print(device)

random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
torch.cuda.manual_seed(SEED)
torch.cuda.manual_seed_all(SEED)

cuda


# Define helper functions

In [44]:
def prepare_data_with_scaler(data, scaler=None, fit=False):
    data = data.dropna()
    X = data.drop(columns=["id", "record", "type"]).values.astype(np.float32)
    y = data["type"].astype("category").cat.codes.values

    if fit:
        X = scaler.fit_transform(X)
    else:
        X = scaler.transform(X)

    return torch.tensor(X, dtype=torch.float32), torch.tensor(y, dtype=torch.long)


def evaluate(model, X, y, criterion):
    model.eval()
    with torch.no_grad():
        logits = model(X)
        loss = criterion(logits, y)
        y_pred = logits.argmax(dim=1).cpu().numpy()

    y_true = y.cpu().numpy()
    return {
        "loss": loss.item(),
        "accuracy": accuracy_score(y_true, y_pred),
        "f_beta_macro": fbeta_score(y_true, y_pred, average="macro", beta=2, zero_division=0)
    }


def estimate_flops(model, input_shape):
    """
    Estimate FLOPs for Linear and Conv2d layers only.
    Args:
        model (nn.Module): PyTorch model
        input_shape (tuple): shape of one input sample, e.g., (1, 3, 224, 224) or (1, input_dim)
    Returns:
        total_flops (int)
    """
    flops = 0

    def count_layer(layer, x_in, x_out):
        nonlocal flops
        # Conv2d FLOPs = Kx * Ky * Cin * Cout * Hout * Wout
        if isinstance(layer, nn.Conv2d):
            out_h, out_w = x_out.shape[2:]
            kernel_ops = layer.kernel_size[0] * layer.kernel_size[1]
            flops += kernel_ops * layer.in_channels * layer.out_channels * out_h * out_w
        # Linear FLOPs = input_features * output_features
        elif isinstance(layer, nn.Linear):
            flops += layer.in_features * layer.out_features

    hooks = []
    for layer in model.modules():
        if isinstance(layer, (nn.Conv2d, nn.Linear)):
            hooks.append(layer.register_forward_hook(count_layer))

    dummy = torch.randn(input_shape).to(next(model.parameters()).device)
    with torch.no_grad():
        model(dummy)

    for h in hooks:
        h.remove()

    return flops

# Create Model Architecture (DNN)

In [46]:
class DNN(nn.Module):
    def __init__(self, input_size=32, hidden=(32, 16, 8), num_classes=5, dropout_rate=0.5):
        super().__init__()
        layers = []
        input_dim = input_size

        for h in hidden:
            layers.append(nn.Linear(input_dim, h))
            layers.append(nn.ReLU(inplace=True))
            layers.append(nn.Dropout(dropout_rate))
            input_dim = h

        layers.append(nn.Linear(input_dim, num_classes))
        self.net = nn.Sequential(*layers)

    def forward(self, x):
        return self.net(x)

# Main Training loop

In [47]:
# Load data
dataset = pd.read_csv("/content/train.csv")
train_dataset, val_dataset = train_test_split(dataset, train_size=0.7, random_state=SEED)
scaler = StandardScaler()
X_train, y_train = prepare_data_with_scaler(train_dataset, scaler, fit=True)
X_val, y_val = prepare_data_with_scaler(val_dataset, scaler, fit=False)

X_train, y_train = X_train.to(device), y_train.to(device)
X_val, y_val = X_val.to(device), y_val.to(device)

In [58]:
# Configuration
class Hyperparameters:
    def __init__(self, lr, epochs, hidden, dropout_rate, patience):
        self.lr = lr
        self.epochs = epochs
        self.hidden = hidden
        self.dropout_rate = dropout_rate
        self.patience = patience

In [63]:
def train(params: Hyperparameters):
  # Create model
  model = DNN(hidden=params.hidden, dropout_rate=params.dropout_rate).to(device)

  class_counts = train_dataset['type'].value_counts()
  weights = 1.0 / class_counts.values
  weights = torch.FloatTensor(weights).to(device)
  criterion = nn.CrossEntropyLoss(weight=weights)
  optimizer = optim.Adam(model.parameters(), lr=params.lr)

  # Training loop with early stopping
  best_val_loss = float('inf')
  patience_counter = 0
  epochs_run = params.epochs
  for epoch in range(1, params.epochs + 1):
      model.train()
      optimizer.zero_grad()
      out = model(X_train)
      loss = criterion(out, y_train)
      loss.backward()
      optimizer.step()
      train_loss = loss.item()

      train_metrics = evaluate(model, X_train, y_train, criterion)
      val_metrics = evaluate(model, X_val, y_val, criterion)

      if epoch % (params.epochs // 10) == 0:
        print(
            f"Epoch {epoch}/{params.epochs} | "
            f"train_loss={train_loss:.4f} train_acc={train_metrics['accuracy']:.4f} "
            f"train_f1={train_metrics['f_beta_macro']:.4f} "
            f"val_loss={val_metrics['loss']:.4f} val_acc={val_metrics['accuracy']:.4f} "
            f"val_f1={val_metrics['f_beta_macro']:.4f} "
        )

      # Early stopping check
      if val_metrics['loss'] < best_val_loss:
          best_val_loss = val_metrics['loss']
          patience_counter = 0
      else:
          patience_counter += 1
          if patience_counter >= params.patience:
              print(f"Early stopping at epoch {epoch}")
              epochs_run = epoch
              break

  results = {"epochs_run": epochs_run,
            "val_loss": val_metrics["loss"],
            "val_accuracy": val_metrics["accuracy"],
            "val_f1": val_metrics["f_beta_macro"],
            "flops": estimate_flops(model, (1, 32))
            }

  return results

In [70]:
# Test the function
hyperparameters = Hyperparameters(lr=1e-3, epochs=500, hidden=(64, 32, 16, 8), dropout_rate=0.5, patience=100)
result = train(hyperparameters)

Epoch 50/500 | train_loss=1.0105 train_acc=0.0677 train_f1=0.0533 val_loss=0.6583 val_acc=0.0678 val_f1=0.0533 
Epoch 100/500 | train_loss=0.7896 train_acc=0.4054 train_f1=0.1579 val_loss=0.5629 val_acc=0.4034 val_f1=0.1574 
Epoch 150/500 | train_loss=0.6818 train_acc=0.6302 train_f1=0.2302 val_loss=0.4893 val_acc=0.6285 val_f1=0.2298 
Epoch 200/500 | train_loss=0.6099 train_acc=0.7641 train_f1=0.2799 val_loss=0.4391 val_acc=0.7640 val_f1=0.2800 
Epoch 250/500 | train_loss=0.5695 train_acc=0.8497 train_f1=0.3178 val_loss=0.4081 val_acc=0.8482 val_f1=0.3171 
Epoch 300/500 | train_loss=0.5248 train_acc=0.9132 train_f1=0.3509 val_loss=0.3724 val_acc=0.9118 val_f1=0.3500 
Epoch 350/500 | train_loss=0.4618 train_acc=0.9339 train_f1=0.3633 val_loss=0.3033 val_acc=0.9324 val_f1=0.3622 
Epoch 400/500 | train_loss=0.4270 train_acc=0.9327 train_f1=0.3628 val_loss=0.2702 val_acc=0.9315 val_f1=0.3618 
Epoch 450/500 | train_loss=0.4008 train_acc=0.9336 train_f1=0.3632 val_loss=0.2523 val_acc=0.9324

In [67]:
print(result)

{'epochs_run': 500, 'val_loss': 0.20437687635421753, 'val_accuracy': 0.9285204469665945, 'val_f1': 0.35985153089835187, 'flops': 4776}


# Neuroevolution

In [72]:
def init_population(pop_size: int, mem_size: int, mem_range: Tuple[float, float], sigma: float, rng: random.Random) -> List[List[float]]:
    population = []
    for _ in range(pop_size):
        member = []
        for _ in range(mem_size):
            gene = rng.uniform(mem_range[0], mem_range[1])
            member.append(gene)
        member.append(sigma)
        population.append(member)
    return population

def evolution_strategy(fitness_fn, mu: int, lambda_: int, mem_size: int, mem_range: Tuple[float, float], sigma: float, tau: float, max_gens: int, rng: random.Random) -> List[List[float]]:
    population = init_population(mu, mem_size, mem_range, sigma, rng)
    cumulative_evals = 0

    for generation_number in range(1, max_gens + 1):
        fitnesses = [fitness_fn.fitness(member[:mem_size]) for member in population]
        cumulative_evals += mu

        offspring = []
        for _ in range(lambda_):
            # Select parent using tournament
            candidates = rng.sample(range(mu), 2)
            parent_idx = max(candidates, key=lambda i: fitnesses[i])
            parent = population[parent_idx]

            child = []
            genes = parent[:mem_size]
            sigma_val = parent[mem_size]
            for gene in genes:
                mutation = rng.gauss(0.0, sigma_val)
                mutated_gene = gene + mutation
                child.append(mutated_gene)
            # Mutate sigma
            sigma_mutation = rng.gauss(0.0, 1.0)
            new_sigma = sigma_val * math.exp(tau * sigma_mutation)
            child.append(new_sigma)
            offspring.append(child)

        offspring_fitnesses = [fitness_fn.fitness(member[:mem_size]) for member in offspring]
        cumulative_evals += lambda_

        max_fitness = max(fitnesses)
        average = sum(fitnesses) / mu
        diversity = 0.0
        for i in range(mu):
            for j in range(i + 1, mu):
                dist = math.sqrt(sum((population[i][k] - population[j][k])**2 for k in range(mem_size)))
                if dist > diversity:
                    diversity = dist
        print(f"Himmelblau ES {mu} {lambda_} {tau} 0.0 {generation_number} {cumulative_evals} {max_fitness} {average} {diversity}")

        if average > 0.99:
            break

        # Select best mu from offspring
        indexed = [(f, i) for i, f in enumerate(offspring_fitnesses)]
        indexed.sort(key=lambda x: x[0], reverse=True)
        selected_indices = [i for _, i in indexed[:mu]]

        new_population = [offspring[idx] for idx in selected_indices]
        population = new_population

    return population


# Test output

In [None]:
X_test, y_test = prepare_data(test_dataset, device)

# Get predictions
with torch.no_grad():
    logits = model(X_test)
    probs = torch.softmax(logits, dim=1).cpu().numpy()
    predictions = probs.argmax(axis=1)

y_true = y_test.cpu().numpy()

# Calculate metrics
accuracy = accuracy_score(y_true, predictions)
f_beta = fbeta_score(y_true, predictions, average="macro", beta=2)

print(f"Test Accuracy: {accuracy:.4f}")
print(f"Test F-Beta (macro): {f_beta:.4f}")