# DAEN 429 Midterm Bonus
### Fletcher Newman | October 14th, 2025
This is the clean file with only the neural network training code and compute_loss() function

Upload `test.csv` in the same working directory to get MSE loss

Regularization used:
- Dropout (3 random features each layer)
- Early stopping (patience of 10 epochs)

My work in testing different models and going through a small amount of EDA is shown in the `midterm_model_scratchWork.ipynb` file

In [1]:
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
import numpy as np
from sklearn.preprocessing import StandardScaler
import os
from collections import OrderedDict

In [2]:
# Import data
train_df = pd.read_csv('BonusQuestionTrain.csv')
val_df = pd.read_csv('BonusQuestionVal.csv')

# Assuming test dataset will be put into directory
if os.path.isfile('test.csv'):
    test = pd.read_csv('test.csv')
else:
    print('test.csv not in directory, defaulting to Null')
    test = None

test.csv not in directory, defaulting to Null


# Defining the model

In [3]:
# Define Multi-Layer Neural Network with Regularization
class NeuralNetwork(nn.Module):
    """
    Multi-layer neural network for non-linear regression.
    
    Architecture:
    - Input layer: 10 features
    - Hidden layer 1: 128 neurons + BatchNorm + ReLU + Dropout
    - Hidden layer 2: 64 neurons + BatchNorm + ReLU + Dropout
    - Hidden layer 3: 32 neurons + BatchNorm + ReLU + Dropout
    - Output layer: 1 neuron (regression output)
    
    Regularization techniques:
    - Dropout (0.3): Randomly zeros some neurons during training to prevent overfitting
    - Batch Normalization: Normalizes layer inputs for stable training
    """
    def __init__(self, input_size=10, dropout_rate=0.3):
        super(NeuralNetwork, self).__init__()
        
        # Layer 1
        self.fc1 = nn.Linear(input_size, 256)
        self.bn1 = nn.BatchNorm1d(256)
        self.dropout1 = nn.Dropout(dropout_rate)
        
        # Layer 2
        self.fc2 = nn.Linear(256, 128)
        self.bn2 = nn.BatchNorm1d(128)
        self.dropout2 = nn.Dropout(dropout_rate)
        
        # Layer 3
        self.fc3 = nn.Linear(128, 32)
        self.bn3 = nn.BatchNorm1d(32)
        self.dropout3 = nn.Dropout(dropout_rate)
        
        # Output layer
        self.fc4 = nn.Linear(32, 1)
        
        # Activation function
        self.relu = nn.ReLU()
    
    def forward(self, x):
        # Layer 1
        x = self.fc1(x)
        x = self.bn1(x)
        x = self.relu(x)
        x = self.dropout1(x)
        
        # Layer 2
        x = self.fc2(x)
        x = self.bn2(x)
        x = self.relu(x)
        x = self.dropout2(x)
        
        # Layer 3
        x = self.fc3(x)
        x = self.bn3(x)
        x = self.relu(x)
        x = self.dropout3(x)
        
        # Output layer (no activation for regression)
        x = self.fc4(x)
        
        return x

# Train function

In [4]:
# Define training function
def train_nn(model, train_df):
    """Trains neural net"""
    # Training Configuration
    # Loss function: Mean Squared Error (MSE) for regression
    criterion = nn.MSELoss()

    # Optimizer: Adam with weight decay (L2 regularization)
    # Weight decay adds penalty for large weights, preventing overfitting
    learning_rate = 0.001
    weight_decay = 1e-5  # L2 regularization strength
    optimizer = optim.Adam(model.parameters(), lr=learning_rate, weight_decay=weight_decay)

    # Prepare data
    # Separate features and target
    X_train = train_df.drop('y', axis=1).values
    y_train = train_df['y'].values

    # Standardize features (important for neural networks!)
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)

    # Convert to PyTorch tensors
    X_train_tensor = torch.FloatTensor(X_train_scaled)
    y_train_tensor = torch.FloatTensor(y_train).reshape(-1, 1)

    # Training Loop
    num_epochs = 100
    batch_size = 32
    patience = 10  
    min_delta = 0.0001  
    best_train_loss = float('inf')
    epochs_without_improvement = 0
    best_model_state = None

    # Create data loaders for batch training
    train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

    # Track training and validation losses
    train_losses = []

    print("Starting training...")
    print(f"Epochs: {num_epochs}, Batch size: {batch_size}\n")

    for epoch in range(num_epochs):
        # Training phase
        model.train() 
        
        for batch_X, batch_y in train_loader:
            # Forward pass
            predictions = model(batch_X)
            loss = criterion(predictions, batch_y)
            
            # Backward pass and optimization
            optimizer.zero_grad()  # Clear previous gradients
            loss.backward()        # Compute gradients
            optimizer.step()       # Update weights

        # Compute training loss WITHOUT dropout (for fair comparison)
        model.eval()  # Disable dropout for evaluation
        with torch.no_grad():
            train_predictions = model(X_train_tensor)
            train_loss = criterion(train_predictions, y_train_tensor)
            train_losses.append(train_loss.item())

        
        # Early stopping check
        if train_loss.item() < best_train_loss - min_delta:
            # Validation loss improved
            best_train_loss = train_loss.item()
            epochs_without_improvement = 0
            # Save the best model state (deep copy)
            best_model_state = OrderedDict({k: v.clone() for k, v in model.state_dict().items()})
            print(f"Epoch [{epoch+1}/{num_epochs}] - Train Loss: {train_loss.item():.4f} ✓ (New best)")
        else:
            # No improvement
            epochs_without_improvement += 1
            print(f"Epoch [{epoch+1}/{num_epochs}] - Train Loss: {train_loss.item():.4f} (No improvement: {epochs_without_improvement}/{patience})")
            
            # Check if we should stop
            if epochs_without_improvement >= patience:
                print(f"\nEarly stopping triggered after {epoch+1} epochs!")
                print(f"Best train loss: {best_train_loss:.4f}")
                break

    # Restore the best model
    if best_model_state is not None:
        model.load_state_dict(best_model_state)
        print("\nRestored model to best train loss state")

    print("\nTraining completed!")
    print(f"Final Training Loss: {train_losses[-1 - epochs_without_improvement]:.4f}")

    return model

# Compute loss function

In [5]:
def compute_loss(model, df):
    """
    Compute MSE loss on test data.
    
    Args:
        model: Trained PyTorch model
        test: pandas DataFrame with same format as training data
    
    Returns:
        MSE
    """
    # Check if df was imported
    if df is None:
        print('ERROR: Dataframe was not imported (defaulted to null)')
        return 
    
    # Extract features and target
    X_test = df.drop('y', axis=1).values
    y_test = df['y'].values
    
    # Standardize using the same scaler from training
    scaler = StandardScaler()
    X_test_scaled = scaler.fit_transform(X_test)
    
    # Convert to tensors
    X_test_tensor = torch.FloatTensor(X_test_scaled)
    y_test_tensor = torch.FloatTensor(y_test).reshape(-1, 1)

    # MSE loss
    criterion = nn.MSELoss()
    
    # Compute predictions
    model.eval()  # Set to evaluation mode
    with torch.no_grad():
        predictions = model(X_test_tensor)
        mse_loss = criterion(predictions, y_test_tensor)
    
    # Print MSE loss as required
    print(f"MSE Loss on test data: {mse_loss.item():.6f}")
    
    return mse_loss.item()

# Init and train model

In [6]:
# Initialize the model
model = NeuralNetwork(input_size=10, dropout_rate=0.3)

# Combine training and validation for more training data
# They were used seperately in the scratch work notebook
mega_df = pd.concat([train_df, val_df], ignore_index=True)

# Train model
best_model = train_nn(model, mega_df)

Starting training...
Epochs: 100, Batch size: 32

Epoch [1/100] - Train Loss: 3.9890 ✓ (New best)
Epoch [2/100] - Train Loss: 1.7067 ✓ (New best)
Epoch [3/100] - Train Loss: 0.6566 ✓ (New best)
Epoch [4/100] - Train Loss: 0.6469 ✓ (New best)
Epoch [5/100] - Train Loss: 0.3600 ✓ (New best)
Epoch [6/100] - Train Loss: 0.5488 (No improvement: 1/10)
Epoch [7/100] - Train Loss: 0.5686 (No improvement: 2/10)
Epoch [8/100] - Train Loss: 0.4217 (No improvement: 3/10)
Epoch [9/100] - Train Loss: 0.4875 (No improvement: 4/10)
Epoch [10/100] - Train Loss: 0.4258 (No improvement: 5/10)
Epoch [11/100] - Train Loss: 0.6163 (No improvement: 6/10)
Epoch [12/100] - Train Loss: 0.4746 (No improvement: 7/10)
Epoch [13/100] - Train Loss: 0.5572 (No improvement: 8/10)
Epoch [14/100] - Train Loss: 0.3213 ✓ (New best)
Epoch [15/100] - Train Loss: 0.4276 (No improvement: 1/10)
Epoch [16/100] - Train Loss: 0.3246 (No improvement: 2/10)
Epoch [17/100] - Train Loss: 0.3498 (No improvement: 3/10)
Epoch [18/100] -

# Generate loss for test df

In [7]:
test_loss = compute_loss(best_model, test)

ERROR: Dataframe was not imported (defaulted to null)
