In [1]:
import numpy as np
import pandas as pd
import torch
from torch import nn
from torch.utils.data import DataLoader, TensorDataset
from sklearn.model_selection import train_test_split, KFold
from sklearn.metrics import mean_squared_error, r2_score
from utils import gather_data, mean_squared_percentage_error

# NN Model for regression
The goal of this model is to build and try a few variances of basic neural networks to see if they can outperform the other regression models.

In [2]:
data, label = gather_data()
x_train, x_test, y_train, y_test = train_test_split(data, label, test_size=0.2)
y_train, y_test = np.reshape(y_train, (len(y_train), 1)), np.reshape(y_test, (len(y_test), 1))
kfold = KFold(n_splits=10, shuffle=True)

Index(['Log GDP per capita', 'Social support',
       'Healthy life expectancy at birth', 'Freedom to make life choices',
       'Generosity', 'Perceptions of corruption', 'Positive affect',
       'Negative affect', 'Confidence in national government'],
      dtype='object')


In [3]:
class NeuralNetwork(nn.Module):
    """
      Represents a Neural Network that can be constructed with a vartiety of hidden layers.
    """
    def __init__(self, input_size, num_hidden_layers, num_nodes_per_layer):
        """
          Initializes a neural network with the specified parameters

          Args:
            input_size: The size of the input feature vector
            num_hidden_layers: The number of hidden layers in the neural network
            num_nodes_per_layer: The number of nodes in each hidden layer
        """
        super().__init__()
        # Start with intialized first layer
        hidden_layers = [nn.Linear(input_size, num_nodes_per_layer)]
        for _ in range(num_hidden_layers):
            hidden_layers.append(nn.Linear(num_nodes_per_layer, num_nodes_per_layer))
            hidden_layers.append(nn.ReLU())
        self.layers = nn.Sequential(*hidden_layers, nn.Linear(num_nodes_per_layer, 1))

    def forward(self, x):
        """
          Feeds x forward through the neural network to get the prediction

          Args:
            x: The feature vector to get the prediction from
        """
        return self.layers(x)

In [4]:

loss_fn = nn.MSELoss()
# The following 2 functions were built with assistance from the torch documentation which has similar functions provided.
def train_loop(dataloader, model, lr=1e-5, num_epochs=200, momentum=0.9, weight_decay: float=None):
    """
      Runs the training loop for the model.

      Args:
        dataloader: the dataloader containing the training data
        model: the model to optimize
        lr: The learning rate for the model
        num_epochs: the number of times to repeat the trianing loop
    """
    optimizer = torch.optim.SGD(model.parameters(), lr=lr, momentum=momentum, weight_decay=weight_decay)
    for epoch in range(num_epochs):
        for batch, (X, y) in enumerate(dataloader):
            # Compute prediction and loss
            pred = model(X)
            loss = loss_fn(pred, y)

            # Backpropagation
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()


def test_loop(dataloader, model, result):
    """
      Runs the test loop for the model and outputs the result to the given dictionary

      Args:
        dataloader: The data for the model to be validated on
        model: The model to test
        result: The result dictionary to store the data in.
    """
    num_batches = len(dataloader)
    test_loss, test_r2, test_mspe = 0, 0, 0

    with torch.no_grad():
        for X, y in dataloader:
            pred = model(X)
            test_loss += loss_fn(pred, y).item()
            test_r2 += r2_score(y.numpy(), pred.numpy())
            test_mspe += mean_squared_percentage_error(y.numpy(), pred.numpy())

    result["MSE"].append(test_loss / num_batches)
    result["R2"].append(test_r2 / num_batches)
    result["MSPE"].append(test_mspe / num_batches)


In [5]:
results = []
for num_hidden_layers in range(1, 3):
    for num_nodes_per_layer in [8, 16, 32, 64]:
        for lr in [1e-5, 1e-4, 1e-3]:
            for momentum in [.9, .99]:
                for weight_decay in [1e-5, 1e-4, 1e-3, .01, .1, .5]:
                    result = {
                        "num_hidden_layers": num_hidden_layers,
                        "num_nodes_per_layer": num_nodes_per_layer,
                        "lr": lr,
                        "momentum": momentum,
                        "weight_decay": weight_decay,
                        "MSE": [],
                        "R2": [],
                        "MSPE": []
                    }
                    for train_index, val_index in kfold.split(x_train, y_train):
                        train_dataset = DataLoader(TensorDataset(torch.Tensor(x_train[train_index]), torch.Tensor(y_train[train_index])), batch_size=64, shuffle=True)
                        val_dataset= DataLoader(TensorDataset(torch.Tensor(x_train[val_index]), torch.Tensor(y_train[val_index])), batch_size=64, shuffle=True)
                        model = NeuralNetwork(len(x_train[0]), num_hidden_layers=num_hidden_layers, num_nodes_per_layer=num_nodes_per_layer)
                        train_loop(train_dataset, model, lr=lr, momentum=momentum, weight_decay=weight_decay)
                        test_loop(val_dataset, model, result)            
                    result["MSE"] = np.mean(result["MSE"])
                    result["R2"] = np.mean(result["R2"])
                    result["MSPE"] = np.mean(result["MSPE"])
                    results.append(result)

In [6]:
results

[{'num_hidden_layers': 1,
  'num_nodes_per_layer': 8,
  'lr': 1e-05,
  'momentum': 0.9,
  'weight_decay': 1e-05,
  'MSE': 0.799657600124677,
  'R2': 0.35195948065992944,
  'MSPE': 3.717631917785515},
 {'num_hidden_layers': 1,
  'num_nodes_per_layer': 8,
  'lr': 1e-05,
  'momentum': 0.9,
  'weight_decay': 0.0001,
  'MSE': 0.731444752216339,
  'R2': 0.36962275136745115,
  'MSPE': 3.2832924334915594},
 {'num_hidden_layers': 1,
  'num_nodes_per_layer': 8,
  'lr': 1e-05,
  'momentum': 0.9,
  'weight_decay': 0.001,
  'MSE': 0.7985026687383653,
  'R2': 0.3657095609433454,
  'MSPE': 3.7992639589415056},
 {'num_hidden_layers': 1,
  'num_nodes_per_layer': 8,
  'lr': 1e-05,
  'momentum': 0.9,
  'weight_decay': 0.01,
  'MSE': 0.834874893228213,
  'R2': 0.33734324741039157,
  'MSPE': 4.061007521143464},
 {'num_hidden_layers': 1,
  'num_nodes_per_layer': 8,
  'lr': 1e-05,
  'momentum': 0.9,
  'weight_decay': 0.1,
  'MSE': 0.7979643791913986,
  'R2': 0.36092843165321037,
  'MSPE': 3.544346478701842},

In [7]:
# Sort by lowest MSE
sorted(results, key=lambda x: x["MSE"])

[{'num_hidden_layers': 2,
  'num_nodes_per_layer': 64,
  'lr': 0.001,
  'momentum': 0.9,
  'weight_decay': 1e-05,
  'MSE': 0.2356918546060721,
  'R2': 0.8087246579994825,
  'MSPE': 1.1024457791989501},
 {'num_hidden_layers': 2,
  'num_nodes_per_layer': 64,
  'lr': 0.001,
  'momentum': 0.99,
  'weight_decay': 1e-05,
  'MSE': 0.23920444659888748,
  'R2': 0.7917996216679747,
  'MSPE': 1.1408100749431838},
 {'num_hidden_layers': 2,
  'num_nodes_per_layer': 8,
  'lr': 0.001,
  'momentum': 0.9,
  'weight_decay': 0.0001,
  'MSE': 0.2404098773996035,
  'R2': 0.8012343042894294,
  'MSPE': 1.134895604207284},
 {'num_hidden_layers': 1,
  'num_nodes_per_layer': 64,
  'lr': 0.0001,
  'momentum': 0.99,
  'weight_decay': 0.001,
  'MSE': 0.24179739902416864,
  'R2': 0.7988074525832575,
  'MSPE': 1.1475948895318342},
 {'num_hidden_layers': 2,
  'num_nodes_per_layer': 64,
  'lr': 0.0001,
  'momentum': 0.99,
  'weight_decay': 0.0001,
  'MSE': 0.24271518141031265,
  'R2': 0.799364937474935,
  'MSPE': 1.21

In [8]:
# Sort by highest R2
sorted(results, key=lambda x: x["R2"], reverse=True)

[{'num_hidden_layers': 2,
  'num_nodes_per_layer': 64,
  'lr': 0.001,
  'momentum': 0.9,
  'weight_decay': 1e-05,
  'MSE': 0.2356918546060721,
  'R2': 0.8087246579994825,
  'MSPE': 1.1024457791989501},
 {'num_hidden_layers': 2,
  'num_nodes_per_layer': 16,
  'lr': 0.0001,
  'momentum': 0.99,
  'weight_decay': 0.001,
  'MSE': 0.24524316365520163,
  'R2': 0.8055668565921076,
  'MSPE': 1.235798517655995},
 {'num_hidden_layers': 1,
  'num_nodes_per_layer': 64,
  'lr': 0.0001,
  'momentum': 0.99,
  'weight_decay': 0.0001,
  'MSE': 0.25389389966924986,
  'R2': 0.8053059077915761,
  'MSPE': 1.3613589903377372},
 {'num_hidden_layers': 2,
  'num_nodes_per_layer': 16,
  'lr': 0.0001,
  'momentum': 0.99,
  'weight_decay': 0.01,
  'MSE': 0.24934231961766878,
  'R2': 0.8038193457407863,
  'MSPE': 1.2034694337747012},
 {'num_hidden_layers': 1,
  'num_nodes_per_layer': 32,
  'lr': 0.001,
  'momentum': 0.99,
  'weight_decay': 1e-05,
  'MSE': 0.24595479071140286,
  'R2': 0.8023366878708164,
  'MSPE': 1

As we can see the best parameters is 2 hidden layers with 64 nodes and a learning rate of 1e-3, a momentum of 0.9, and a weight decay of 1e-5.

In [10]:
train_dataset = DataLoader(TensorDataset(torch.Tensor(x_train), torch.Tensor(y_train)), batch_size=64, shuffle=True)
test_dataset = DataLoader(TensorDataset(torch.Tensor(x_test), torch.Tensor(y_test)), batch_size=64, shuffle=True)
best_model = NeuralNetwork(len(x_train[0]), 2, 64)
train_loop(train_dataset, best_model, lr=1e-3, momentum=0.9, weight_decay=1e-5)
result = {
    "MSE": [],
    "R2": [],
    "MSPE": []
}
test_loop(test_dataset, best_model, result)
print(f"MSE: {result['MSE']}")
print(f"R2: {result['R2']}")
print(f"MSPE: {result['MSPE']}")

MSE: [0.28993074347575504]
R2: [0.7654101324580855]
MSPE: [1.340149575844407]
