# M05W03 - Multilayer Perceptrons (MLPs) and Activation Functions 

## Import Packages

In [45]:
import os

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader


## Utils

In [46]:
# Random state set up
random_state = 59
np.random.seed(random_state)
if torch.cuda.is_available():
    torch.cuda.manual_seed(random_state)
else:
    torch.manual_seed(random_state)
    
# Set up Pytorch computational device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

## Project 1: Auto MPG

### Data Preparation

In [47]:
# Configuration
val_size = 0.2
test_size = 0.125
is_shuffle = True
batch_size = 32

In [48]:
# Implement CustomDataset class to store input data
class CustomDataset(Dataset):
    def __init__(self, x, y):
        self.X = x
        self.y = y
    
    def __len__(self):
        return len(self.y)
    
    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

In [49]:
# Load Data
dataset_dir = 'data'
dataset_filename = 'Auto_MPG_data.csv'
dataset_path = os.path.join('..', dataset_dir, dataset_filename)
dataset = pd.read_csv(dataset_path)

# Data Preprocessing
X = dataset.drop(columns='MPG').to_numpy()
y = dataset['MPG'].to_numpy()

# Train - val - test split
X_train, X_val, y_train, y_val = train_test_split(
    X, y,
    test_size=val_size,
    random_state=random_state,
    shuffle=is_shuffle
)

X_train, X_test, y_train, y_test = train_test_split(
    X_train, y_train,
    test_size=test_size,
    random_state=random_state,
    shuffle=is_shuffle
)

# Standardization
normalizer = StandardScaler()
X_train = normalizer.fit_transform(X_train)
X_val = normalizer.transform(X_val)
X_test = normalizer.transform(X_test)

# Convert to Pytorch tensor
X_train = torch.tensor(X_train, dtype=torch.float32)
X_val = torch.tensor(X_val, dtype=torch.float32)
X_test = torch.tensor(X_test, dtype=torch.float32)

y_train = torch.tensor(y_train, dtype=torch.float32)
y_val = torch.tensor(y_val, dtype=torch.float32)
y_test = torch.tensor(y_test, dtype=torch.float32)

# Initialize CustomDataset obj
train_dataset = CustomDataset(X_train, y_train)
val_dataset = CustomDataset(X_val, y_val)
test_dataset = CustomDataset(X_test, y_test)

# Initialize DataLoader
train_loader = DataLoader(train_dataset,
                          batch_size=batch_size,
                          shuffle=True,
                          num_workers=0)
val_loader = DataLoader(val_dataset,
                        batch_size=batch_size,
                        shuffle=True,
                        num_workers=0)
test_loader = DataLoader(test_dataset,
                        batch_size=batch_size,
                        shuffle=True,
                        num_workers=0)

### Model Construction

In [50]:
# Implement MLP structure
class MLP (nn.Module):
    def __init__ (self, input_dims, hidden_dims, output_dims):
        super().__init__()
        self.linear1 = nn.Linear(input_dims, hidden_dims)
        self.linear2 = nn.Linear(hidden_dims, hidden_dims)
        self.output = nn.Linear(hidden_dims, output_dims)
    
    def forward(self, x: torch.Tensor) -> torch.Tensor:
        x = self.linear1(x)
        x = F.relu(x)
        x = self.linear2(x)
        x = F.relu(x)
        out = self.output(x)
        return out.squeeze(1)

In [51]:
# Configuration
input_dims = X_train.shape[1]
output_dims = 1
hidden_dims = 64


# Model intialization
model = MLP(input_dims=input_dims,
            hidden_dims=hidden_dims,
            output_dims=output_dims).to(device)

# Loss Function Selection
criterion = nn.MSELoss()

# Optimizer Selection
momentum = 0.0
weight_decay = 0.0
lr = 1e-2
optimizer = torch.optim.SGD(model.parameters(), 
                            momentum=momentum,
                            weight_decay=weight_decay,
                            lr=lr)

# Metric Selection
def r_squared(y_true, y_pred):
    y_true = torch.Tensor(y_true).to(device)
    y_pred = torch.Tensor(y_pred).to(device)
    mean_true = torch.mean(y_true)
    ss_tot = torch.sum((y_true - mean_true) ** 2)
    ss_res = torch.sum((y_true - y_pred) ** 2)
    r2 = 1 - (ss_res / ss_tot)
    return r2

### Train Model

In [52]:
# Configuration
epochs = 100
train_losses = []
val_losses = []
train_r2 = []
val_r2 = []

# Training
for epoch in range(epochs):
    train_loss = 0.0
    train_target = []
    val_target = []
    train_predict = []
    val_predict = []
    model.train()
    for X_samples, y_sample in train_loader:
        X_samples = X_samples.to(device)
        y_samples = y_sample.to(device)
        optimizer.zero_grad()
        outputs = model(X_samples)
        train_predict += outputs.tolist()
        train_target += outputs.tolist()
        loss = criterion(outputs, y_samples)
        loss.backward()
        optimizer.step()
        train_loss += loss.item()
    train_loss /= len(train_loader)
    train_losses.append(train_loss)
    train_r2.append(r_squared(train_target, train_predict))
    model.eval()
    val_loss = 0.0
    with torch.no_grad():
        for X_samples, y_samples in val_loader:
            X_samples = X_samples.to(device)
            y_samples = y_samples.to(device)
            outputs = model(X_samples)
            val_predict += outputs.tolist()
            val_target += y_samples.tolist()
            loss = criterion(outputs, y_samples)
            val_loss += loss.item()
    val_loss /= len(val_loader)
    val_losses.append(val_loss)
    val_r2.append(r_squared(val_target, val_predict))
    print(f"\nEPOCH {epoch + 1}:\tTraining Loss: {train_loss:.3f}\tValidation loss: {val_loss:.3f}")       


EPOCH 1:	Training Loss: 264.166	Validation loss: 556.237

EPOCH 2:	Training Loss: 279.203	Validation loss: 184.840

EPOCH 3:	Training Loss: 112.318	Validation loss: 215.240

EPOCH 4:	Training Loss: 52.179	Validation loss: 24.586

EPOCH 5:	Training Loss: 39.055	Validation loss: 6.259

EPOCH 6:	Training Loss: 24.509	Validation loss: 12.618

EPOCH 7:	Training Loss: 22.156	Validation loss: 131.026

EPOCH 8:	Training Loss: 95.069	Validation loss: 19.908

EPOCH 9:	Training Loss: 18.043	Validation loss: 17.442

EPOCH 10:	Training Loss: 15.602	Validation loss: 6.905

EPOCH 11:	Training Loss: 10.203	Validation loss: 9.593

EPOCH 12:	Training Loss: 13.507	Validation loss: 11.153

EPOCH 13:	Training Loss: 14.607	Validation loss: 10.934

EPOCH 14:	Training Loss: 9.423	Validation loss: 4.750

EPOCH 15:	Training Loss: 21.439	Validation loss: 7.417

EPOCH 16:	Training Loss: 9.123	Validation loss: 6.131

EPOCH 17:	Training Loss: 13.770	Validation loss: 5.305

EPOCH 18:	Training Loss: 6.803	Validation

### Evaluation

In [53]:
model.eval()
with torch.no_grad():
    y_hat = model(X_test.to(device))
    test_set_r2 = r_squared(y_hat, y_test)
    print("Evaluation on test set: ")
    print(f"R2: {test_set_r2}")

Evaluation on test set: 
R2: 0.8961698412895203
