In [1]:
import numpy as np
import pandas as pd
import torch
from torch import nn
from torch.utils.data import DataLoader, TensorDataset
from sklearn.model_selection import train_test_split, KFold
from sklearn.metrics import mean_squared_error, r2_score
from utils import gather_data, mean_squared_percentage_error

# NN Model for regression
The goal of this model is to build and try a few variances of basic neural networks to see if they can outperform the other regression models.

In [2]:
data, label = gather_data()
x_train, x_test, y_train, y_test = train_test_split(data, label, test_size=0.2)
y_train, y_test = np.reshape(y_train, (len(y_train), 1)), np.reshape(y_test, (len(y_test), 1))
kfold = KFold(n_splits=10, shuffle=True)

Index(['Log GDP per capita', 'Social support',
       'Healthy life expectancy at birth', 'Freedom to make life choices',
       'Generosity', 'Perceptions of corruption', 'Positive affect',
       'Negative affect', 'Confidence in national government'],
      dtype='object')


In [3]:
class NeuralNetwork(nn.Module):
    """
      Represents a Neural Network that can be constructed with a vartiety of hidden layers.
    """
    def __init__(self, input_size, num_hidden_layers, num_nodes_per_layer):
        """
          Initializes a neural network with the specified parameters

          Args:
            input_size: The size of the input feature vector
            num_hidden_layers: The number of hidden layers in the neural network
            num_nodes_per_layer: The number of nodes in each hidden layer
        """
        super().__init__()
        # Start with intialized first layer
        hidden_layers = [nn.Linear(input_size, num_nodes_per_layer)]
        for _ in range(num_hidden_layers):
            hidden_layers.append(nn.Linear(num_nodes_per_layer, num_nodes_per_layer))
            hidden_layers.append(nn.ReLU())
        self.layers = nn.Sequential(*hidden_layers, nn.Linear(num_nodes_per_layer, 1))

    def forward(self, x):
        """
          Feeds x forward through the neural network to get the prediction

          Args:
            x: The feature vector to get the prediction from
        """
        return self.layers(x)

In [4]:

loss_fn = nn.MSELoss()
# The following 2 functions were built with assistance from the torch documentation which has similar functions provided.
def train_loop(dataloader, model, lr=1e-5, num_epochs=200):
    """
      Runs the training loop for the model.

      Args:
        dataloader: the dataloader containing the training data
        model: the model to optimize
        lr: The learning rate for the model
        num_epochs: the number of times to repeat the trianing loop
    """
    optimizer = torch.optim.SGD(model.parameters(), lr=lr)
    for epoch in range(num_epochs):
        for batch, (X, y) in enumerate(dataloader):
            # Compute prediction and loss
            pred = model(X)
            loss = loss_fn(pred, y)

            # Backpropagation
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()


def test_loop(dataloader, model, result):
    """
      Runs the test loop for the model and outputs the result to the given dictionary

      Args:
        dataloader: The data for the model to be validated on
        model: The model to test
        result: The result dictionary to store the data in.
    """
    num_batches = len(dataloader)
    test_loss, test_r2, test_mspe = 0, 0, 0

    with torch.no_grad():
        for X, y in dataloader:
            pred = model(X)
            test_loss += loss_fn(pred, y).item()
            test_r2 += r2_score(y.numpy(), pred.numpy())
            test_mspe += mean_squared_percentage_error(y.numpy(), pred.numpy())

    result["MSE"].append(test_loss / num_batches)
    result["R2"].append(test_r2 / num_batches)
    result["MSPE"].append(test_mspe / num_batches)


In [5]:
results = []
for num_hidden_layers in range(1, 3):
    for num_nodes_per_layer in [8, 16, 32, 64]:
        for lr in [1e-5, 1e-4, 1e-3]:
            result = {
                "num_hidden_layers": num_hidden_layers,
                "num_nodes_per_layer": num_nodes_per_layer,
                "lr": lr,
                "MSE": [],
                "R2": [],
                "MSPE": []
            }
            for train_index, val_index in kfold.split(x_train, y_train):
                train_dataset = DataLoader(TensorDataset(torch.Tensor(x_train[train_index]), torch.Tensor(y_train[train_index])), batch_size=64, shuffle=True)
                val_dataset= DataLoader(TensorDataset(torch.Tensor(x_train[val_index]), torch.Tensor(y_train[val_index])), batch_size=64, shuffle=True)
                model = NeuralNetwork(len(x_train[0]), num_hidden_layers=num_hidden_layers, num_nodes_per_layer=num_nodes_per_layer)
                train_loop(train_dataset, model, lr=lr)
                test_loop(val_dataset, model, result)            
            result["MSE"] = np.mean(result["MSE"])
            result["R2"] = np.mean(result["R2"])
            result["MSPE"] = np.mean(result["MSPE"])
            results.append(result)

In [6]:
results

[{'num_hidden_layers': 1,
  'num_nodes_per_layer': 8,
  'lr': 1e-05,
  'MSE': 21.96667699813843,
  'R2': -16.04467396375931,
  'MSPE': 67.75803418171526},
 {'num_hidden_layers': 1,
  'num_nodes_per_layer': 8,
  'lr': 0.0001,
  'MSE': 0.7521995529532433,
  'R2': 0.40903642597493794,
  'MSPE': 3.3932061376746256},
 {'num_hidden_layers': 1,
  'num_nodes_per_layer': 8,
  'lr': 0.001,
  'MSE': 0.31304701666037243,
  'R2': 0.7330454635787432,
  'MSPE': 1.527019767937335},
 {'num_hidden_layers': 1,
  'num_nodes_per_layer': 16,
  'lr': 1e-05,
  'MSE': 20.519692675272623,
  'R2': -15.654657721009343,
  'MSPE': 63.060800758275114},
 {'num_hidden_layers': 1,
  'num_nodes_per_layer': 16,
  'lr': 0.0001,
  'MSE': 0.8041506628195444,
  'R2': 0.3906509966519347,
  'MSPE': 3.5562607997821436},
 {'num_hidden_layers': 1,
  'num_nodes_per_layer': 16,
  'lr': 0.001,
  'MSE': 0.29382758438587187,
  'R2': 0.7509266938234045,
  'MSPE': 1.4407435515801414},
 {'num_hidden_layers': 1,
  'num_nodes_per_layer': 3

In [7]:
# Sort by lowest MSE
sorted(results, key=lambda x: x["MSE"])

[{'num_hidden_layers': 2,
  'num_nodes_per_layer': 64,
  'lr': 0.001,
  'MSE': 0.27139189069469777,
  'R2': 0.7565012849395938,
  'MSPE': 1.248820401784597},
 {'num_hidden_layers': 2,
  'num_nodes_per_layer': 16,
  'lr': 0.001,
  'MSE': 0.29115673899650574,
  'R2': 0.7608458294660674,
  'MSPE': 1.4290420133432353},
 {'num_hidden_layers': 1,
  'num_nodes_per_layer': 64,
  'lr': 0.001,
  'MSE': 0.2919377850989501,
  'R2': 0.7395965652707828,
  'MSPE': 1.3752966615926436},
 {'num_hidden_layers': 1,
  'num_nodes_per_layer': 16,
  'lr': 0.001,
  'MSE': 0.29382758438587187,
  'R2': 0.7509266938234045,
  'MSPE': 1.4407435515801414},
 {'num_hidden_layers': 1,
  'num_nodes_per_layer': 32,
  'lr': 0.001,
  'MSE': 0.2938385918736458,
  'R2': 0.7623195062620584,
  'MSPE': 1.4319465096776534},
 {'num_hidden_layers': 2,
  'num_nodes_per_layer': 32,
  'lr': 0.001,
  'MSE': 0.2970942872265975,
  'R2': 0.7504019572187643,
  'MSPE': 1.477529115626833},
 {'num_hidden_layers': 2,
  'num_nodes_per_layer': 

In [8]:
# Sort by highest R2
sorted(results, key=lambda x: x["R2"], reverse=True)

[{'num_hidden_layers': 1,
  'num_nodes_per_layer': 32,
  'lr': 0.001,
  'MSE': 0.2938385918736458,
  'R2': 0.7623195062620584,
  'MSPE': 1.4319465096776534},
 {'num_hidden_layers': 2,
  'num_nodes_per_layer': 16,
  'lr': 0.001,
  'MSE': 0.29115673899650574,
  'R2': 0.7608458294660674,
  'MSPE': 1.4290420133432353},
 {'num_hidden_layers': 2,
  'num_nodes_per_layer': 64,
  'lr': 0.001,
  'MSE': 0.27139189069469777,
  'R2': 0.7565012849395938,
  'MSPE': 1.248820401784597},
 {'num_hidden_layers': 1,
  'num_nodes_per_layer': 16,
  'lr': 0.001,
  'MSE': 0.29382758438587187,
  'R2': 0.7509266938234045,
  'MSPE': 1.4407435515801414},
 {'num_hidden_layers': 2,
  'num_nodes_per_layer': 32,
  'lr': 0.001,
  'MSE': 0.2970942872265975,
  'R2': 0.7504019572187643,
  'MSPE': 1.477529115626833},
 {'num_hidden_layers': 2,
  'num_nodes_per_layer': 8,
  'lr': 0.001,
  'MSE': 0.30758808478713034,
  'R2': 0.7492705909441482,
  'MSPE': 1.4502070425548639},
 {'num_hidden_layers': 1,
  'num_nodes_per_layer': 

As we can see the best parameters is 1 hidden layer with 32 nodes and a learning rate of 1e-3.

In [9]:
train_dataset = DataLoader(TensorDataset(torch.Tensor(x_train), torch.Tensor(y_train)), batch_size=64, shuffle=True)
test_dataset = DataLoader(TensorDataset(torch.Tensor(x_test), torch.Tensor(y_test)), batch_size=64, shuffle=True)
best_model = NeuralNetwork(len(x_train[0]), 1, 32)
train_loop(train_dataset, best_model, lr=1e-3)
result = {
    "MSE": [],
    "R2": [],
    "MSPE": []
}
test_loop(test_dataset, best_model, result)
print(f"MSE: {result['MSE']}")
print(f"R2: {result['R2']}")
print(f"MSPE: {result['MSPE']}")

MSE: [0.2820224662621816]
R2: [0.786443092445488]
MSPE: [1.5082617176489697]
