# Import Libraries

In [2]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
import pickle
from sklearn.pipeline import Pipeline
from skopt import BayesSearchCV
from skopt.space import Real, Integer
import torchvision.models as models
import shap

# Model

In [3]:
directory = '../data/'

Mounted at /content/drive


In [4]:
# Load the data split from file
with open(directory + 'data_split.pkl', 'rb') as f:
    X_train, X_test, y_train, y_test = pickle.load(f)

In [5]:
class MLP(nn.Module):
    def __init__(self, input_dim):
        super(MLP, self).__init__()
        self.layers = nn.Sequential(
            nn.Linear(input_dim, 64),
            nn.ReLU(),
            nn.Linear(64, 32),
            nn.ReLU(),
            nn.Linear(32, 1)
        )

    def forward(self, x):
        return self.layers(x)

In [6]:
# Convert the DataFrame to a NumPy array
X_train_numpy = X_train.to_numpy()
X_test_numpy = X_test.to_numpy()

# Convert to PyTorch tensors
X_train_tensor = torch.tensor(X_train_numpy, dtype=torch.float32)
X_test_tensor = torch.tensor(X_test_numpy, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train.values, dtype=torch.float32).view(-1, 1)
y_test_tensor = torch.tensor(y_test.values, dtype=torch.float32).view(-1, 1)

# Create DataLoader
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)

In [7]:
input_dim = X_train_tensor.shape[1]
model = MLP(input_dim)
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

num_epochs = 100
for epoch in range(num_epochs):
    model.train()
    for X_batch, y_batch in train_loader:
        optimizer.zero_grad()
        outputs = model(X_batch)
        loss = criterion(outputs, y_batch)
        loss.backward()
        optimizer.step()

    if (epoch+1) % 10 == 0:
        print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')


Epoch [10/100], Loss: 0.0072
Epoch [20/100], Loss: 0.0401
Epoch [30/100], Loss: 0.0062
Epoch [40/100], Loss: 0.0735
Epoch [50/100], Loss: 0.0113
Epoch [60/100], Loss: 0.0014
Epoch [70/100], Loss: 0.0075
Epoch [80/100], Loss: 0.0009
Epoch [90/100], Loss: 0.0008
Epoch [100/100], Loss: 0.0063


In [8]:
model.eval()
with torch.no_grad():
    y_pred = model(X_test_tensor).numpy().ravel()  # Reshape to (977,)

mse = mean_squared_error(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f"Mean Squared Error: {mse}")
print(f"Mean Absolute Error: {mae}")
print(f"Root mean squared Score: {r2}")

Mean Squared Error: 0.20686680148159395
Mean Absolute Error: 0.05768942653518359
Root mean squared Score: 0.7812022251571791


# Export

In [9]:
# Save predictions and metrics
results = {
    'y_pred': y_pred,
    'mse': mse,
    'mae': mae,
    'r2': r2
}

# Save the predictions
with open(directory + 'multilayer_perceptron_results.pkl', 'wb') as file:
    pickle.dump(results, file)