In [1]:
import numpy as np
import pandas as pd
import torch
from torch import nn
from torch.utils.data import DataLoader, TensorDataset
from sklearn.model_selection import train_test_split, KFold
from sklearn.metrics import mean_squared_error, r2_score
from utils import gather_data, mean_squared_percentage_error

# NN Model for regression
The goal of this model is to build and try a few variances of basic neural networks to see if they can outperform the other regression models.

In [2]:
x_train, x_test, y_train, y_test = gather_data()
y_train, y_test = np.reshape(y_train, (-1, 1)), np.reshape(y_test, (-1, 1))
kfold = KFold(n_splits=10, shuffle=True)

Index(['Log GDP per capita', 'Social support',
       'Healthy life expectancy at birth', 'Freedom to make life choices',
       'Generosity', 'Perceptions of corruption', 'Positive affect',
       'Negative affect', 'Confidence in national government'],
      dtype='object')


In [3]:
class NeuralNetwork(nn.Module):
    """
      Represents a Neural Network that can be constructed with a vartiety of hidden layers.
    """
    def __init__(self, input_size, num_hidden_layers, num_nodes_per_layer):
        """
          Initializes a neural network with the specified parameters

          Args:
            input_size: The size of the input feature vector
            num_hidden_layers: The number of hidden layers in the neural network
            num_nodes_per_layer: The number of nodes in each hidden layer
        """
        super().__init__()
        # Start with intialized first layer
        hidden_layers = [nn.Linear(input_size, num_nodes_per_layer)]
        for _ in range(num_hidden_layers):
            hidden_layers.append(nn.Linear(num_nodes_per_layer, num_nodes_per_layer))
            hidden_layers.append(nn.ReLU())
        self.layers = nn.Sequential(*hidden_layers, nn.Linear(num_nodes_per_layer, 1))

    def forward(self, x):
        """
          Feeds x forward through the neural network to get the prediction

          Args:
            x: The feature vector to get the prediction from
        """
        return self.layers(x)

In [4]:

# The following 2 functions were built with assistance from the torch documentation which has similar functions provided.
def train_loop(dataloader, model, lr=1e-5, num_epochs=200, momentum=0.9, weight_decay: float=None, loss_fn=nn.MSELoss(), early_stopping: int=None):
    """
      Runs the training loop for the model.

      Args:
        dataloader: the dataloader containing the training data
        model: the model to optimize
        lr: The learning rate for the model
        num_epochs: the number of times to repeat the training loop
        momentum: The momentum for SGD
        weight_decay: The amount of weight decay for SGD
        loss_fn: the loss function to optimize
        early_stopping: how many rounds of having a loss that doesn't improve before stopping
    """
    model_dict = {}
    best_model = {}
    best_loss = np.inf
    kept_for = 1
    optimizer = torch.optim.SGD(model.parameters(), lr=lr, momentum=momentum, weight_decay=weight_decay)
    for epoch in range(num_epochs):
        total_loss = 0
        for batch, (X, y) in enumerate(dataloader):
            # Compute prediction and loss
            pred = model(X)
            loss = loss_fn(pred, y)
            total_loss += loss.item()

            # Store model dict for early stopping
            if early_stopping:
                model_dict = model.state_dict()

            # Backpropagation
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
        
        total_loss /= len(dataloader)

        if early_stopping:
            # Determine if we should stop early
            if best_loss < total_loss:
                kept_for += 1
                if kept_for >= early_stopping:
                    model.load_state_dict(best_model)
                    return 
            else:
                best_model = model_dict
                best_loss = total_loss
                kept_for = 1


def test_loop(dataloader, model, result):
    """
      Runs the test loop for the model and outputs the result to the given dictionary

      Args:
        dataloader: The data for the model to be validated on
        model: The model to test
        result: The result dictionary to store the data in.
    """
    num_batches = len(dataloader)
    test_loss, test_r2, test_mspe = 0, 0, 0
    loss_fn = nn.MSELoss()

    with torch.no_grad():
        for X, y in dataloader:
            pred = model(X)
            test_loss += loss_fn(pred, y).item()
            test_r2 += r2_score(y.numpy(), pred.numpy())
            test_mspe += mean_squared_percentage_error(y.numpy(), pred.numpy())

    result["MSE"].append(test_loss / num_batches)
    result["R2"].append(test_r2 / num_batches)
    result["MSPE"].append(test_mspe / num_batches)


In [5]:
results = []
for num_hidden_layers in range(1, 3):
    for num_nodes_per_layer in [8, 16, 32, 64]:
        for lr in [1e-5, 1e-4, 1e-3]:
            for momentum in [.9, .99]:
                for weight_decay in [1e-5, 1e-4, 1e-3, .01, .1, .5]:
                    result = {
                        "num_hidden_layers": num_hidden_layers,
                        "num_nodes_per_layer": num_nodes_per_layer,
                        "lr": lr,
                        "momentum": momentum,
                        "weight_decay": weight_decay,
                        "MSE": [],
                        "R2": [],
                        "MSPE": []
                    }
                    for train_index, val_index in kfold.split(x_train, y_train):
                        train_dataset = DataLoader(TensorDataset(torch.Tensor(x_train[train_index]), torch.Tensor(y_train[train_index])), batch_size=64, shuffle=True)
                        val_dataset= DataLoader(TensorDataset(torch.Tensor(x_train[val_index]), torch.Tensor(y_train[val_index])), batch_size=64, shuffle=True)
                        model = NeuralNetwork(len(x_train[0]), num_hidden_layers=num_hidden_layers, num_nodes_per_layer=num_nodes_per_layer)
                        train_loop(train_dataset, model, lr=lr, momentum=momentum, weight_decay=weight_decay, early_stopping=25)
                        test_loop(val_dataset, model, result)            
                    result["MSE"] = np.mean(result["MSE"])
                    result["R2"] = np.mean(result["R2"])
                    result["MSPE"] = np.mean(result["MSPE"])
                    results.append(result)

In [6]:
results

[{'num_hidden_layers': 1,
  'num_nodes_per_layer': 8,
  'lr': 1e-05,
  'momentum': 0.9,
  'weight_decay': 1e-05,
  'MSE': 0.8046819478273392,
  'R2': 0.34763551720241426,
  'MSPE': 3.771036384119229},
 {'num_hidden_layers': 1,
  'num_nodes_per_layer': 8,
  'lr': 1e-05,
  'momentum': 0.9,
  'weight_decay': 0.0001,
  'MSE': 1.4261133939027786,
  'R2': -0.23667380788148956,
  'MSPE': 5.350006940501808},
 {'num_hidden_layers': 1,
  'num_nodes_per_layer': 8,
  'lr': 1e-05,
  'momentum': 0.9,
  'weight_decay': 0.001,
  'MSE': 0.7884099960327149,
  'R2': 0.3406578970787604,
  'MSPE': 3.5839888511808837},
 {'num_hidden_layers': 1,
  'num_nodes_per_layer': 8,
  'lr': 1e-05,
  'momentum': 0.9,
  'weight_decay': 0.01,
  'MSE': 1.5571848889191944,
  'R2': -0.42851338252561905,
  'MSPE': 5.767024894329635},
 {'num_hidden_layers': 1,
  'num_nodes_per_layer': 8,
  'lr': 1e-05,
  'momentum': 0.9,
  'weight_decay': 0.1,
  'MSE': 0.7609119981527328,
  'R2': 0.37364844944431547,
  'MSPE': 3.2713576864375

In [7]:
import json
with open("nn_results.json", "wt") as f:
    json.dump(results, f)

In [8]:
# Sort by lowest MSE
sorted(results, key=lambda x: x["MSE"])

[{'num_hidden_layers': 2,
  'num_nodes_per_layer': 64,
  'lr': 0.001,
  'momentum': 0.9,
  'weight_decay': 0.0001,
  'MSE': 0.25703807870546974,
  'R2': 0.7872408391806608,
  'MSPE': 1.2496729673949458},
 {'num_hidden_layers': 2,
  'num_nodes_per_layer': 32,
  'lr': 0.0001,
  'momentum': 0.99,
  'weight_decay': 0.001,
  'MSE': 0.2573568854480982,
  'R2': 0.7742094559465296,
  'MSPE': 1.2011611699644063},
 {'num_hidden_layers': 2,
  'num_nodes_per_layer': 64,
  'lr': 0.001,
  'momentum': 0.99,
  'weight_decay': 0.0001,
  'MSE': 0.2589205980300903,
  'R2': 0.786697878032885,
  'MSPE': 1.2418141310112645},
 {'num_hidden_layers': 2,
  'num_nodes_per_layer': 64,
  'lr': 0.001,
  'momentum': 0.99,
  'weight_decay': 1e-05,
  'MSE': 0.26158651411533357,
  'R2': 0.7831063026963083,
  'MSPE': 1.2136999431132065},
 {'num_hidden_layers': 2,
  'num_nodes_per_layer': 32,
  'lr': 0.0001,
  'momentum': 0.99,
  'weight_decay': 0.0001,
  'MSE': 0.26192603980501494,
  'R2': 0.7840488091012527,
  'MSPE': 

In [9]:
# Sort by highest R2
sorted(results, key=lambda x: x["R2"], reverse=True)

[{'num_hidden_layers': 1,
  'num_nodes_per_layer': 64,
  'lr': 0.0001,
  'momentum': 0.99,
  'weight_decay': 0.001,
  'MSE': 0.2659540211160977,
  'R2': 0.7875701399335296,
  'MSPE': 1.2628805374895984},
 {'num_hidden_layers': 2,
  'num_nodes_per_layer': 64,
  'lr': 0.001,
  'momentum': 0.9,
  'weight_decay': 0.0001,
  'MSE': 0.25703807870546974,
  'R2': 0.7872408391806608,
  'MSPE': 1.2496729673949458},
 {'num_hidden_layers': 2,
  'num_nodes_per_layer': 64,
  'lr': 0.001,
  'momentum': 0.99,
  'weight_decay': 0.0001,
  'MSE': 0.2589205980300903,
  'R2': 0.786697878032885,
  'MSPE': 1.2418141310112645},
 {'num_hidden_layers': 2,
  'num_nodes_per_layer': 32,
  'lr': 0.0001,
  'momentum': 0.99,
  'weight_decay': 0.0001,
  'MSE': 0.26192603980501494,
  'R2': 0.7840488091012527,
  'MSPE': 1.278309210702175},
 {'num_hidden_layers': 2,
  'num_nodes_per_layer': 64,
  'lr': 0.0001,
  'momentum': 0.99,
  'weight_decay': 1e-05,
  'MSE': 0.2694628993670146,
  'R2': 0.7834700641480017,
  'MSPE': 1

As we can see the best parameters is 2 hidden layers with 64 nodes and a learning rate of 1e-3, a momentum of 0.9, and a weight decay of 1e-4.

In [11]:
train_dataset = DataLoader(TensorDataset(torch.Tensor(x_train), torch.Tensor(y_train)), batch_size=64, shuffle=True)
test_dataset = DataLoader(TensorDataset(torch.Tensor(x_test), torch.Tensor(y_test)), batch_size=64, shuffle=True)
best_model = NeuralNetwork(len(x_train[0]), 2, 64)
train_loop(train_dataset, best_model, lr=1e-3, momentum=0.9, weight_decay=1e-4, early_stopping=25)
train_results = {
    "MSE": [],
    "R2": [],
    "MSPE": []
}
test_loop(test_dataset, best_model, train_results)
print(f"Train MSE: {train_results['MSE']}")
print(f"Train R2: {train_results['R2']}")
print(f"Train MSPE: {train_results['MSPE']}")
test_result = {
    "MSE": [],
    "R2": [],
    "MSPE": []
}
test_loop(test_dataset, best_model, test_result)
print(f"Test MSE: {test_result['MSE']}")
print(f"Test R2: {test_result['R2']}")
print(f"Test MSPE: {test_result['MSPE']}")

Train MSE: [0.24206174165010452]
Train R2: [0.81868693801231]
Train MSPE: [1.2185537690917652]
Test MSE: [0.243272602558136]
Test R2: [0.8257231067682708]
Test MSPE: [1.2306270386195846]


Another option is 1 layer 64 nodes, a learning rate of 1e-4, a momentum of 0.99 and a weight decay of 1e-3

In [12]:
train_dataset = DataLoader(TensorDataset(torch.Tensor(x_train), torch.Tensor(y_train)), batch_size=64, shuffle=True)
test_dataset = DataLoader(TensorDataset(torch.Tensor(x_test), torch.Tensor(y_test)), batch_size=64, shuffle=True)
best_model = NeuralNetwork(len(x_train[0]), 1, 64)
train_loop(train_dataset, best_model, lr=1e-4, momentum=0.99, weight_decay=1e-3, early_stopping=25)
train_results = {
    "MSE": [],
    "R2": [],
    "MSPE": []
}
test_loop(test_dataset, best_model, train_results)
print(f"Train MSE: {train_results['MSE']}")
print(f"Train R2: {train_results['R2']}")
print(f"Train MSPE: {train_results['MSPE']}")
test_result = {
    "MSE": [],
    "R2": [],
    "MSPE": []
}
test_loop(test_dataset, best_model, test_result)
print(f"Test MSE: {test_result['MSE']}")
print(f"Test R2: {test_result['R2']}")
print(f"Test MSPE: {test_result['MSPE']}")

Train MSE: [0.23476113379001617]
Train R2: [0.8295247541712]
Train MSPE: [1.1758846799946494]
Test MSE: [0.2355706493059794]
Test R2: [0.830658863855672]
Test MSPE: [1.1940753966983821]
