In [29]:
import pandas as pd
import numpy as np
import json
import torch
import torch.nn as nn
import torch.optim as optim
import election_project as ep
from typing import List, Dict, Tuple, Any, Type, Union

In [27]:
# =============================================================================
# 1. Global Constants and Configuration
# =============================================================================

# --- File Paths ---
DATA_DIR = "./data"
MODELS_DIR = "./models"
RESULTS_DIR = "./results"
LOGS_DIR = "./logs"
PREDS_DIR = "./preds"

# --- Device Selection ---
if torch.backends.mps.is_available() and torch.backends.mps.is_built():
    DEVICE = torch.device("mps")
    print("Using MPS device (Apple Silicon GPU)")
elif torch.cuda.is_available():
    DEVICE = torch.device("cuda")
    print("Using CUDA device (NVIDIA GPU)")
else:
    DEVICE = torch.device("cpu")
    print("Using CPU device")

# --- Default Training Hyperparameters ---
BATCH_SIZE: int = 64
MAX_CV_EPOCHS: int = 30 # Max epochs for CV
PATIENCE: int = 10      # Patience for early stopping during CV
FINAL_TRAIN_EPOCHS: int = 150 # Fixed epochs for final training
OPTIMIZER_CHOICE: Type[optim.Optimizer] = optim.AdamW # Default optimizer

# --- Default Hyperparameter Grids for CV ---
RIDGE_PARAM_GRID = [0.0001, 0.001, 0.01, 0.1, 1.0, 10.0]
SOFTMAX_PARAM_GRID = {
    'learning_rate': [1e-2, 1e-3, 1e-4],
    'weight_decay': [0, 1e-5, 1e-3]
}
MLP1_PARAM_GRID = {
    'n_hidden': [16, 64, 128],
    'dropout_rate': [0.1, 0.3, 0.5],
    'learning_rate': [1e-2, 1e-3, 1e-4]
    # Note: weight_decay could be added here too if desired
}
MLP2_PARAM_GRID = {
    'shared_hidden_size': [16, 32, 64],
    'dropout_rate': [0.1, 0.3, 0.5],
    'learning_rate': [1e-2, 1e-3, 1e-4]
    # Note: weight_decay could be added here too if desired
}

# --- XGBoost Hyperparameter Grid and Constants ---
XGB_PARAM_GRID = {
    'learning_rate': [0.05, 0.1, 0.2],     # Step size shrinkage (eta)
    'max_depth': [5, 7],                # Max depth of a tree
    'subsample': [0.8, 1.0],         # Fraction of samples used per tree
    'colsample_bytree': [0.8, 1.0],  # Fraction of features used per tree
    'gamma': [0.1, 0.2],                # Min loss reduction for split (min_split_loss)
    'reg_alpha': [0, 0.1, 1.0],            # L1 regularization
    'reg_lambda': [0, 0.1, 1.0],           # L2 regularization
    # Fixed parameters for consistency
    'objective': ['reg:squarederror'], # Regression objective for each target
    'n_estimators': [200],             # High initial value, CV uses early stopping
    'random_state': [42]               # For reproducibility
}

XGB_EARLY_STOPPING_ROUNDS = 20 # Early stopping rounds for CV fits

RUNG_EPOCHS = [25, 50, 75, 100, 125, 150, 175, 200] # Rung epochs for MLP models
RUNG_PATIENCE = [15, 20, 25, 30, 35, 40, 45, 50] # Rung patience for MLP models

Using MPS device (Apple Silicon GPU)


In [28]:
dh = ep.DataHandler()

AttributeError: 'DataHandler' object has no attribute 'data_csv_path'