# Multi-layer perceptron (Deep Learning) with PyTorch

In [None]:
!pip install -r requirements.txt --quiet

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from deep_learn.metrics import compute_metrics
import pandas as pd
import torch

## Load in data

In [None]:
mlp_data = pd.read_csv('data/synthetic_income_data.csv', 
                       index_col=None)

In [None]:
X = mlp_data.drop(columns=['income'])
y = mlp_data['income']

## Prepare train and test set splits

In [None]:
from sklearn.model_selection import train_test_split
RANDOM_SEED = 42
X_temp, X_test, y_temp, y_test = train_test_split(
    X, y, test_size=0.1, random_state=RANDOM_SEED
)
X_train, X_val, y_train, y_val = train_test_split(
    X_temp, y_temp, test_size=1/9, random_state=RANDOM_SEED
)


## Scale the data

In [None]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)
X_val_scaled = scaler.transform(X_val)

## Set CUDA driver

In [None]:
from torch import nn

In [None]:
import torch
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"PyTorch currently working on: {device}.")

## Convert data to Tensors

In [None]:
from torch.utils.data import DataLoader, TensorDataset

def to_tensor(data, reshape=False):
    tensor = torch.tensor(data, dtype=torch.float32)
    return tensor.view(-1, 1) if reshape else tensor


In [None]:
X_train_torch = to_tensor(X_train_scaled)
y_train_torch = to_tensor(y_train.to_numpy(), reshape=True)
X_test_torch  = to_tensor(X_test_scaled)
y_test_torch  = to_tensor(y_test.to_numpy(), reshape=True)
X_val_torch   = to_tensor(X_val_scaled)
y_val_torch   = to_tensor(y_val.to_numpy(), reshape=True)

## Create a Tensor dataset

In [None]:
train_dataset = TensorDataset(X_train_torch, y_train_torch)
val_dataset = TensorDataset(X_val_torch, y_val_torch)

## Create a `DataLoader`

In [None]:
train_loader = DataLoader(train_dataset, batch_size=32, 
                          shuffle=True)

val_loader = DataLoader(val_dataset, batch_size=32, 
                        shuffle=False)

## Build MLP Class

In [None]:
import torch.nn as nn

class MLP(nn.Module):
    def __init__(self, input_size, hidden_layers, output_size=1):
        """
        Parameters:
        - input_size: int, number of input features
        - hidden_layers: list of int, number of neurons in each hidden layer
        - output_size: int, number of output features (default: 1)
        """
        super().__init__()
        layers = []
        
        # Input layer
        prev_size = input_size
        for hidden_size in hidden_layers:
            layers.append(nn.Linear(prev_size, hidden_size))
            layers.append(nn.ReLU())
            prev_size = hidden_size
        
        # Output layer
        layers.append(nn.Linear(prev_size, output_size))
        self.net = nn.Sequential(*layers)

    def forward(self, x):
        return self.net(x)


## Put model on device

In [None]:
model = MLP(input_size=X.shape[1], 
            hidden_layers=[64, 32], 
            output_size=1)
model = model.to(device)

## Create Optimizer for gradient descent

In [None]:
from torch.optim import Adam
LEARN_RATE = 0.001
optimizer = Adam(model.parameters(), 
                             lr=LEARN_RATE)

## Create Loss Function to Optimize

In [None]:
import torch
import torch.nn as nn

class MSELoss(nn.Module):
    def __init__(self):
        super().__init__()
        self.mse = nn.MSELoss()
    def forward(self, yhat, y):
        return self.mse(yhat, y)


In [None]:
loss_fn = MSELoss()

## Build Training Loop

In [None]:
from tqdm import tqdm
import torch

EPOCHS = 100
patience = 2  
best_val_loss = float('inf')
epochs_no_improve = 0

train_losses = []
val_losses = []

for epoch in range(EPOCHS):
    model.train()
    total_train_loss = 0
    train_loop = tqdm(train_loader, desc=f"Epoch {epoch+1}/{EPOCHS} [Training]", leave=False)

    for features_batch, targets_batch in train_loop:
        features_batch = features_batch.to(device)
        targets_batch = targets_batch.to(device)
        predictions = model(features_batch)
        loss = loss_fn(predictions, targets_batch)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        total_train_loss += loss.item()
        train_loop.set_postfix(train_loss=loss.item())

    avg_train_loss = total_train_loss / len(train_loader)
    train_losses.append(avg_train_loss)

    # Validation phase
    model.eval()
    total_val_loss = 0
    with torch.no_grad():
        for val_features, val_targets in val_loader:
            val_features = val_features.to(device)
            val_targets = val_targets.to(device)

            val_predictions = model(val_features)
            val_loss = loss_fn(val_predictions, val_targets)
            total_val_loss += val_loss.item()

    avg_val_loss = total_val_loss / len(val_loader)
    val_losses.append(avg_val_loss)

    print(f"Epoch {epoch+1} | Train Loss: {avg_train_loss:.4f} | Val Loss: {avg_val_loss:.4f}")

    # Early stopping based on validation loss
    if avg_val_loss < best_val_loss:
        best_val_loss = avg_val_loss
        epochs_no_improve = 0
        torch.save(model.state_dict(), 'models/best_model.pth')
    else:
        epochs_no_improve += 1

    if epochs_no_improve >= patience:
        print(f"Early stopping at epoch {epoch+1} (no improvement in val loss for {patience} epochs)")
        break


In [None]:
import matplotlib.pyplot as plt
from typing import List, Optional, Tuple, Dict

def plot_training_history(train_losses: Optional[List[float]] = None,
                          valid_losses: Optional[List[float]] = None,
                          log_y_axis: bool = True,
                          figsize: Tuple[int, int] = (10, 6),
                          train_label: str = 'Train Loss',
                          valid_label: str = 'Validation Loss',
                          plot_x_label: str = 'Epoch',
                          plot_y_label: str = 'Loss',
                          plot_title: str = 'Training and Validation Loss Over Epochs',
                          show_grid: bool = True, 
                          train_args: Optional[Dict] = None,
                          valid_args: Optional[Dict] = None) -> None:
    """
    Plot training and validation loss over epochs.

    Parameters:
    - train_losses: List of training loss values
    - valid_losses: List of validation loss values
    - log_y_axis: Whether to use log scale for the y-axis
    - figsize: Tuple for the plot size (width, height)
    - train_label: Label for the training loss line
    - valid_label: Label for the validation loss line
    - train_args: Optional dict of plot kwargs for training loss line
    - valid_args: Optional dict of plot kwargs for validation loss line
    """
    if train_losses is None and valid_losses is None:
        raise ValueError("At least one of train_losses or valid_losses must be provided.")

    train_args = train_args or {}
    valid_args = valid_args or {}

    plt.figure(figsize=figsize)

    if train_losses is not None:
        plt.plot(train_losses, label=train_label, **train_args)
    if valid_losses is not None:
        plt.plot(valid_losses, label=valid_label, **valid_args)

    plt.xlabel(plot_x_label)
    plt.ylabel(plot_y_label)
    plt.title(plot_title)
    plt.legend()
    if log_y_axis:
        plt.yscale('log')
    plt.grid(show_grid)
    plt.tight_layout()
    plt.show()

In [None]:
plot_training_history(
    train_losses, val_losses, 
    log_y_axis=True, figsize=(10,6),
    train_args=dict(color='black', 
                    linestyle='-', 
                    linewidth=2),
    valid_args=dict(color='grey', 
                    linestyle='--', 
                    linewidth=2))

## Load model

In [None]:
model = MLP(input_size=X.shape[1], 
            hidden_layers=[64, 32], 
            output_size=1) 
model.load_state_dict(torch.load('models/best_model.pth'))
model.to(device)

## Evaluation of model

In [None]:
model.eval()
with torch.no_grad():
    preds = model(X_test_torch.to(device))
    rmse = torch.sqrt(loss_fn(preds, y_test_torch.to(device)))
    
print("PyTorch MLP RMSE:", rmse.item())


## Using Regression Diagnostic Plot

In [None]:
from modelviz.regression import regression_diagnostics_panel
import numpy

In [None]:
regression_diagnostics_panel(
    y_test=y_test_torch.cpu().numpy().flatten(),
    y_pred=preds.cpu().detach().numpy().flatten(),
    qq_line_color='grey', qq_point_color='black')