In [2]:
import pandas as pd
import numpy as np
import json
import torch
import torch.nn as nn
import torch.optim as optim
from typing import List, Dict, Tuple, Any, Type, Union

In [3]:
import election_project as ep

Using MPS device (Apple Silicon GPU)


In [4]:
# --- Default Training Hyperparameters ---
BATCH_SIZE: int = 64
MAX_CV_EPOCHS: int = 30 # Max epochs for CV
PATIENCE: int = 10      # Patience for early stopping during CV
FINAL_TRAIN_EPOCHS: int = 150 # Fixed epochs for final training
OPTIMIZER_CHOICE: Type[optim.Optimizer] = optim.AdamW # Default optimizer

# --- Default Hyperparameter Grids for CV ---
RIDGE_PARAM_GRID = [0.0001, 0.001, 0.01, 0.1, 1.0, 10.0]

MLP1_PARAM_GRID = {
    'n_hidden': [16, 64, 128],
    'dropout_rate': [0.1, 0.3, 0.5],
    'learning_rate': [1e-2, 1e-3, 1e-4]
    # Note: weight_decay could be added here too if desired
}
MLP2_PARAM_GRID = {
    'shared_hidden_size': [16, 32, 64],
    'dropout_rate': [0.1, 0.3, 0.5],
    'learning_rate': [1e-2, 1e-3, 1e-4]
    # Note: weight_decay could be added here too if desired
}

# --- XGBoost Hyperparameter Grid and Constants ---
XGB_PARAM_GRID = {
    'learning_rate': [0.05, 0.1, 0.2],     # Step size shrinkage (eta)
    'max_depth': [5, 7],                # Max depth of a tree
    'subsample': [0.8, 1.0],         # Fraction of samples used per tree
    'colsample_bytree': [0.8, 1.0],  # Fraction of features used per tree
    'gamma': [0.1, 0.2],                # Min loss reduction for split (min_split_loss)
    'reg_alpha': [0, 0.1, 1.0],            # L1 regularization
    'reg_lambda': [0, 0.1, 1.0],           # L2 regularization
    # Fixed parameters for consistency
    'objective': ['reg:squarederror'], # Regression objective for each target
    'n_estimators': [200],             # High initial value, CV uses early stopping
    'random_state': [42]               # For reproducibility
}

XGB_EARLY_STOPPING_ROUNDS = 20 # Early stopping rounds for CV fits

RUNG_EPOCHS = [25, 50, 75, 100, 125, 150, 175, 200] # Rung epochs for MLP models
RUNG_PATIENCE = [15, 20, 25, 30, 35, 40, 45, 50] # Rung patience for MLP models

In [5]:
#set a manual seed for reproducibility
torch.manual_seed(42)
dh = ep.DataHandler()
dh.update_cv_dataloaders(batch_size=BATCH_SIZE)
dh.update_final_dataloader(batch_size=BATCH_SIZE)

DataHandler initialized - Using 114 features - Test year: 2020
Updated cross-validation DataLoaders with batch size 64.
Updated final training DataLoader with batch size 64.


In [9]:
MLP1_PARAM_GRID = {
    'hidden_layers': [[16], [32], [64], [128]],       # Width of the single hidden layer
    'learning_rate': [1e-2, 1e-3, 1e-4],        # Optimizer learning rate
    'dropout_rate': [0.0, 0.1, 0.2],           # Dropout regularization
    'weight_decay': [0, 1e-5, 1e-3]             # L2 regularization (AdamW style)
}

RUNG_EPOCHS = [25, 50, 75, 100, 125, 150, 175, 200] # Rung epochs for MLP models
RUNG_PATIENCE = [15, 20, 25, 30, 35, 40, 45, 50] # Rung patience for MLP models
MLP1_SCHEDULE = list(zip(RUNG_EPOCHS, RUNG_PATIENCE))

#make mlp1 model
mlp1_model = ep.NNModel(model_name = 'mlp1')

NNModel initialized for 'mlp1'. CV results: ./results/mlp1_cv_results.csv


In [11]:
mlp1_model.cross_validate(dh,
                          param_grid=MLP1_PARAM_GRID,
                          optimizer_choice=OPTIMIZER_CHOICE,
                          rung_schedule=MLP1_SCHEDULE
                          )



--- Starting SHA Cross-Validation for MLP1 (eta=3) ---
-------------------------------------------------------------------------------
>>> SHA Rung 1/8 | Target Epochs: 25 | Patience: 15 | Evaluating 108 configs <<<
-------------------------------------------------------------------------------
| Config | Last Epoch | Best Epoch | Train Loss |  Val Loss  | Time (seconds) |
|     1  |     23     |     15     |  0.838874  |  0.845222  |      27.59s    |
|     2  |     22     |     11     |  0.841238  |  0.843598  |      25.90s    |
|     3  |     25     |     19     |  0.840484  |  0.845393  |      29.50s    |
|     4  |     25     |     21     |  0.838757  |  0.845708  |      29.60s    |
|     5  |     25     |     19     |  0.838702  |  0.845632  |      29.88s    |
|     6  |     25     |     20     |  0.839764  |  0.846116  |      29.90s    |
|     7  |     25     |     24     |  0.844878  |  0.853489  |      30.43s    |
|     8  |     25     |     24     |  0.846943  |  0.854509  | 

KeyboardInterrupt: 

In [5]:
#make softmax model
softmax_model = ep.NNModel(model_name = 'Softmax')

# param grid and rung schedule for softmax model
SOFTMAX_PARAM_GRID = {'learning_rate': [1e-1, 1e-2, 1e-3, 1e-4],
                      'weight_decay': [0, 1e-5, 1e-4, 1e-3]
                    }
SOFTMAX_SCHEDULE = [(50, 20), (75, 25), (100, 30), (150, 40)]

NNModel initialized for 'Softmax'. CV results: ./results/Softmax_cv_results.csv


In [6]:
softmax_model.cross_validate(dh,
                             param_grid=SOFTMAX_PARAM_GRID,
                             optimizer_choice=OPTIMIZER_CHOICE,
                             rung_schedule=SOFTMAX_SCHEDULE
                             )


--- Starting SHA Cross-Validation for SOFTMAX (eta=3) ---
-------------------------------------------------------------------------------

>>> SHA Rung 1/4 | Target Epochs: 50 | Patience: 20 | Evaluating 16 configs <<<
-------------------------------------------------------------------------------
| Config | Last Epoch | Best Epoch | Train Loss |  Val Loss  | Time (seconds) |
|     1  |     38     |     21     |   1.09341  |   1.05917  |      43.09s    |
|     2  |     46     |     31     |  1.083709  |  1.058134  |      49.63s    |
|     3  |     22     |      2     |  1.229732  |  1.062608  |      23.81s    |
|     4  |     23     |      3     |  1.147415  |  1.058997  |      25.25s    |
|     5  |     38     |     18     |  0.841234  |  0.844526  |      41.64s    |
|     6  |     39     |     19     |   0.84157  |  0.843677  |      42.12s    |
|     7  |     38     |     19     |  0.841895  |  0.844437  |      41.00s    |
|     8  |     39     |     22     |  0.841344  |  0.844243 

Unnamed: 0,learning_rate,weight_decay,last_epoch,best_epoch,train_loss_at_best,best_val_loss
0,0.01,0.001,87,47,0.842698,0.843331
1,0.01,0.0001,92,53,0.840768,0.843501
2,0.01,1e-05,59,19,0.84157,0.843677


In [13]:
softmax_model.train_final_model(dh,
                                final_train_epochs= FINAL_TRAIN_EPOCHS,
                                optimizer_choice=OPTIMIZER_CHOICE
                                )


--- Starting Final Model Training for SOFTMAX ---
Using best hyperparameters from CV: {'learning_rate': 0.01, 'weight_decay': 0.0001, 'last_epoch': 77.66666666666667, 'best_epoch': 37.666666666666664, 'train_loss_at_best': 0.8412514337149682, 'best_val_loss': 0.8426987974822117}
Starting final training for up to 150 epochs (Patience: 50)...
  Epoch 10/150 - Loss: 0.843121 (Best Loss: 0.843121, Epochs No Improve: 0)
  Epoch 20/150 - Loss: 0.841399 (Best Loss: 0.841399, Epochs No Improve: 0)
  Epoch 30/150 - Loss: 0.841244 (Best Loss: 0.840897, Epochs No Improve: 3)
  Epoch 40/150 - Loss: 0.840943 (Best Loss: 0.840566, Epochs No Improve: 9)
  Epoch 50/150 - Loss: 0.843518 (Best Loss: 0.839810, Epochs No Improve: 1)
  Epoch 60/150 - Loss: 0.840637 (Best Loss: 0.839810, Epochs No Improve: 11)
  Epoch 70/150 - Loss: 0.841734 (Best Loss: 0.839807, Epochs No Improve: 1)
  Epoch 80/150 - Loss: 0.840093 (Best Loss: 0.839807, Epochs No Improve: 11)
  Epoch 90/150 - Loss: 0.841632 (Best Loss: 0.

Sequential(
  (0): Linear(in_features=114, out_features=4, bias=True)
  (1): Softmax(dim=1)
)

In [14]:
softmax_preds = softmax_model.predict(dh, save=True)


--- Generating Predictions for SOFTMAX on Year 2020 ---
County-level raw predictions saved to: ./preds/2020_Softmax_predictions.csv


In [15]:
softmax_preds

array([[0.15207319, 0.02530907, 0.2590983 , 0.32924837],
       [0.15103795, 0.06097275, 0.2786021 , 0.29776868],
       [0.20952702, 0.0151517 , 0.16357335, 0.4046892 ],
       ...,
       [0.11637519, 0.05959414, 0.2536456 , 0.2911093 ],
       [0.15302789, 0.04937229, 0.29364637, 0.2851832 ],
       [0.08838788, 0.12065135, 0.21881565, 0.3679239 ]],
      shape=(3090, 4), dtype=float32)