In [1]:
import numpy as np
import pandas as pd
import torch
from torch import nn
from torch.utils.data import DataLoader, TensorDataset
from sklearn.model_selection import train_test_split, KFold
from sklearn.metrics import mean_squared_error, r2_score
from utils import gather_data, mean_squared_percentage_error

# NN Model for regression
The goal of this model is to build and try a few variances of basic neural networks to see if they can outperform the other regression models.

In [2]:
data, label = gather_data()
x_train, x_test, y_train, y_test = train_test_split(data, label, test_size=0.2)
y_train, y_test = np.reshape(y_train, (len(y_train), 1)), np.reshape(y_test, (len(y_test), 1))
kfold = KFold(n_splits=10, shuffle=True)

Index(['Log GDP per capita', 'Social support',
       'Healthy life expectancy at birth', 'Freedom to make life choices',
       'Generosity', 'Perceptions of corruption', 'Positive affect',
       'Negative affect', 'Confidence in national government'],
      dtype='object')


In [3]:
class NeuralNetwork(nn.Module):
    """
      Represents a Neural Network that can be constructed with a vartiety of hidden layers.
    """
    def __init__(self, input_size, num_hidden_layers, num_nodes_per_layer):
        """
          Initializes a neural network with the specified parameters

          Args:
            input_size: The size of the input feature vector
            num_hidden_layers: The number of hidden layers in the neural network
            num_nodes_per_layer: The number of nodes in each hidden layer
        """
        super().__init__()
        # Start with intialized first layer
        hidden_layers = [nn.Linear(input_size, num_nodes_per_layer)]
        for _ in range(num_hidden_layers):
            hidden_layers.append(nn.Linear(num_nodes_per_layer, num_nodes_per_layer))
            hidden_layers.append(nn.ReLU())
        self.layers = nn.Sequential(*hidden_layers, nn.Linear(num_nodes_per_layer, 1))

    def forward(self, x):
        """
          Feeds x forward through the neural network to get the prediction

          Args:
            x: The feature vector to get the prediction from
        """
        return self.layers(x)

In [7]:

loss_fn = nn.MSELoss()
# The following 2 functions were built with assistance from the torch documentation which has similar functions provided.
def train_loop(dataloader, model, lr=1e-5, num_epochs=400):
    """
      Runs the training loop for the model.

      Args:
        dataloader: the dataloader containing the training data
        model: the model to optimize
        lr: The learning rate for the model
        num_epochs: the number of times to repeat the trianing loop
    """
    optimizer = torch.optim.SGD(model.parameters(), lr=lr)
    for epoch in range(num_epochs):
        for batch, (X, y) in enumerate(dataloader):
            # Compute prediction and loss
            pred = model(X)
            loss = loss_fn(pred, y)

            # Backpropagation
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()


def test_loop(dataloader, model, result):
    """
      Runs the test loop for the model and outputs the result to the given dictionary

      Args:
        dataloader: The data for the model to be validated on
        model: The model to test
        result: The result dictionary to store the data in.
    """
    num_batches = len(dataloader)
    test_loss, test_r2, test_mspe = 0, 0, 0

    with torch.no_grad():
        for X, y in dataloader:
            pred = model(X)
            test_loss += loss_fn(pred, y).item()
            test_r2 += r2_score(y.numpy(), pred.numpy())
            test_mspe += mean_squared_percentage_error(y.numpy(), pred.numpy())

    result["MSE"].append(test_loss / num_batches)
    result["R2"].append(test_r2 / num_batches)
    result["MSPE"].append(test_mspe / num_batches)


In [8]:
results = []
for num_hidden_layers in range(1, 3):
    for num_nodes_per_layer in [8, 16, 32, 64]:
        for lr in [1e-6, 1e-5, 1e-4]:
            result = {
                "num_hidden_layers": num_hidden_layers,
                "num_nodes_per_layer": num_nodes_per_layer,
                "lr": lr,
                "MSE": [],
                "R2": [],
                "MSPE": []
            }
            for train_index, val_index in kfold.split(x_train, y_train):
                train_dataset = DataLoader(TensorDataset(torch.Tensor(x_train[train_index]), torch.Tensor(y_train[train_index])), batch_size=64, shuffle=True)
                val_dataset= DataLoader(TensorDataset(torch.Tensor(x_train[val_index]), torch.Tensor(y_train[val_index])), batch_size=64, shuffle=True)
                model = NeuralNetwork(len(x_train[0]), num_hidden_layers=num_hidden_layers, num_nodes_per_layer=num_nodes_per_layer)
                train_loop(train_dataset, model, lr=lr)
                test_loop(val_dataset, model, result)            
            result["MSE"] = np.mean(result["MSE"])
            result["R2"] = np.mean(result["R2"])
            result["MSPE"] = np.mean(result["MSPE"])
            results.append(result)

In [9]:
results

[{'num_hidden_layers': 1,
  'num_nodes_per_layer': 8,
  'lr': 1e-06,
  'MSE': 2.5325150122245157,
  'R2': -1.3473954078858146,
  'MSPE': 8.771734589315724},
 {'num_hidden_layers': 1,
  'num_nodes_per_layer': 8,
  'lr': 1e-05,
  'MSE': 2.6160859286785128,
  'R2': -1.6202175498901972,
  'MSPE': 9.265676214162145},
 {'num_hidden_layers': 1,
  'num_nodes_per_layer': 8,
  'lr': 0.0001,
  'MSE': 1.0526214043299356,
  'R2': 0.16687507802998316,
  'MSPE': 3.7145481952889403},
 {'num_hidden_layers': 1,
  'num_nodes_per_layer': 16,
  'lr': 1e-06,
  'MSE': 0.646595432360967,
  'R2': 0.46208151006604953,
  'MSPE': 3.0083383420350573},
 {'num_hidden_layers': 1,
  'num_nodes_per_layer': 16,
  'lr': 1e-05,
  'MSE': 0.6521557996670405,
  'R2': 0.44428559053394395,
  'MSPE': 3.049813525826491},
 {'num_hidden_layers': 1,
  'num_nodes_per_layer': 16,
  'lr': 0.0001,
  'MSE': 0.6213269114494324,
  'R2': 0.5033215351657102,
  'MSPE': 3.047536370542013},
 {'num_hidden_layers': 1,
  'num_nodes_per_layer': 32

In [10]:
# Sort by lowest MSE
sorted(results, key=lambda x: x["MSE"])

[{'num_hidden_layers': 1,
  'num_nodes_per_layer': 64,
  'lr': 0.0001,
  'MSE': 0.5653095593055089,
  'R2': 0.5455759687350545,
  'MSPE': 2.7834487016867806},
 {'num_hidden_layers': 2,
  'num_nodes_per_layer': 64,
  'lr': 0.0001,
  'MSE': 0.6041739712158838,
  'R2': 0.4456790825067136,
  'MSPE': 2.9791163639024347},
 {'num_hidden_layers': 1,
  'num_nodes_per_layer': 32,
  'lr': 0.0001,
  'MSE': 0.6056168759862582,
  'R2': 0.4869735677193119,
  'MSPE': 2.9903817202218557},
 {'num_hidden_layers': 2,
  'num_nodes_per_layer': 32,
  'lr': 0.0001,
  'MSE': 0.6097867891192437,
  'R2': 0.4923086709402204,
  'MSPE': 2.8784052124529174},
 {'num_hidden_layers': 2,
  'num_nodes_per_layer': 32,
  'lr': 1e-06,
  'MSE': 0.6105712632338206,
  'R2': 0.47552786360746335,
  'MSPE': 2.9187475539969676},
 {'num_hidden_layers': 1,
  'num_nodes_per_layer': 64,
  'lr': 1e-05,
  'MSE': 0.6176611234744389,
  'R2': 0.5129029427030762,
  'MSPE': 2.9321971349418163},
 {'num_hidden_layers': 1,
  'num_nodes_per_laye

In [11]:
# Sort by highest R2
sorted(results, key=lambda x: x["R2"], reverse=True)

[{'num_hidden_layers': 1,
  'num_nodes_per_layer': 64,
  'lr': 0.0001,
  'MSE': 0.5653095593055089,
  'R2': 0.5455759687350545,
  'MSPE': 2.7834487016867806},
 {'num_hidden_layers': 1,
  'num_nodes_per_layer': 64,
  'lr': 1e-05,
  'MSE': 0.6176611234744389,
  'R2': 0.5129029427030762,
  'MSPE': 2.9321971349418163},
 {'num_hidden_layers': 1,
  'num_nodes_per_layer': 16,
  'lr': 0.0001,
  'MSE': 0.6213269114494324,
  'R2': 0.5033215351657102,
  'MSPE': 3.047536370542013},
 {'num_hidden_layers': 2,
  'num_nodes_per_layer': 32,
  'lr': 1e-05,
  'MSE': 0.6250403225421904,
  'R2': 0.498794890863301,
  'MSPE': 2.9406386617608744},
 {'num_hidden_layers': 2,
  'num_nodes_per_layer': 32,
  'lr': 0.0001,
  'MSE': 0.6097867891192437,
  'R2': 0.4923086709402204,
  'MSPE': 2.8784052124529174},
 {'num_hidden_layers': 1,
  'num_nodes_per_layer': 32,
  'lr': 1e-06,
  'MSE': 0.654764606555303,
  'R2': 0.48754521592526406,
  'MSPE': 3.1543210939024435},
 {'num_hidden_layers': 1,
  'num_nodes_per_layer': 

As we can see the best parameters is 1 hidden layer with 64 nodes and a learning rate of 1e-4.

In [13]:
train_dataset = DataLoader(TensorDataset(torch.Tensor(x_train), torch.Tensor(y_train)), batch_size=64, shuffle=True)
test_dataset = DataLoader(TensorDataset(torch.Tensor(x_test), torch.Tensor(y_test)), batch_size=64, shuffle=True)
best_model = NeuralNetwork(len(x_train[0]), 1, 64)
train_loop(train_dataset, best_model, lr=1e-4)
result = {
    "MSE": [],
    "R2": [],
    "MSPE": []
}
test_loop(test_dataset, best_model, result)
print(f"MSE: {result['MSE']}")
print(f"R2: {result['R2']}")
print(f"MSPE: {result['MSPE']}")

MSE: [0.5675925860802332]
R2: [0.5892713514569737]
MSPE: [2.2219670770896807]
