In [1]:
import copy

from sklearn.linear_model import LinearRegression
import numpy as np
from typing import Callable
import torch
import torch.nn as nn
import torch.nn.functional as F
import MyNeuralNetwork_New


### PyTorch network class for comparison with our network

In [147]:
# pyTorch neural network class

class myPytorchNetwork(nn.Module):
    def __init__(self, layers, activation):
        super(myPytorchNetwork, self).__init__()
        self.activation = activation
        self.fc_layers = nn.ModuleList([nn.Linear(layers[i], layers[i + 1]) for i in range(len(layers) - 1)])

    def forward(self, x):
        x = torch.flatten(x, 1)

        # TODO: I'm using relus everywhere for now, we need to change it to be adjustable
        for layer in self.fc_layers[:-1]:
            if self.activation == 'linear':
                x = layer(x)
            elif self.activation == 'relu':
                x = F.relu(layer(x))
            elif self.activation == 'sigmoid':
                x = torch.sigmoid(layer(x))
            elif self.activation == 'tanh':
                x = torch.tanh(layer(x))
            else:
                raise ValueError(f"Activation funtion {self.activation} is invalid")


        # No need for activation in the last layer
        return self.fc_layers[-1](x)


def train_torch_network(network: myPytorchNetwork, dataset, max_epochs, batch_size, train_ratio=0.7,
                        learning_rate=0.001, momentum=0.9, silent=False):
    loss_fn = torch.nn.MSELoss()
    # TODO: think of making the learning rate adaptive here, e.g. by using pytorch LR scheduler
    optimizer = torch.optim.SGD(network.parameters(), lr=learning_rate, momentum=momentum)
    split_index = int(dataset.shape[0] * train_ratio)
    train, validation = dataset[:split_index, :], torch.from_numpy(dataset[split_index:, :])

    best_validation_loss = float("inf")

    for epoch in range(max_epochs):
        np.random.shuffle(train)
        torch_train = torch.from_numpy(train)
        batch_start_idx = 0

        batches_loss = []
        # This way we skip last samples if there are less than batch_size of them
        while batch_start_idx + batch_size <= torch_train.shape[0]:
            optimizer.zero_grad()
            outputs = network(torch_train[batch_start_idx:batch_start_idx + batch_size, :-1])
            loss = loss_fn(torch.flatten(outputs), torch_train[batch_start_idx:batch_start_idx + batch_size, -1])
            batches_loss.append(loss.item())

            loss.backward()

            optimizer.step()
            batch_start_idx += batch_size

        # Now, check the loss on validation dataset
        validation_output = torch.flatten(network(validation[:, :-1]))
        validation_loss = loss_fn(validation_output, validation[:, -1])

        best_validation_loss = validation_loss

        if not silent:
            print(
                f"Epoch: {epoch}. \nLoss on training: {np.mean(batches_loss)} \nLoss on validation: {validation_loss} \n##########")


### Helper functions to run each model multiple times and gathering statistics

In [233]:
def run_my_neural_network(network_parameters, train_dataset, test_inputs) -> np.array:
    """
    Train our neural network and predict data from test_inputs
    """
    network = MyNeuralNetwork_New.MyNeuralNetwork(*network_parameters)
    network.fit(train_dataset[:, :-1], train_dataset[:, [-1]], batch_size=10)
    prediction = network.predict(test_inputs)
    return prediction



def run_linear_regression(train_dataset, test_inputs) -> np.array:
    """
    Train a linear regression on train_inputs, and predict data for test_inputs
    """
    reg = LinearRegression().fit(train_dataset[:, :-1], train_dataset[:, -1])
    prediction = reg.predict(test_inputs)
    return prediction


def run_pytorch_network(network_parameters, train_dataset, test_inputs) -> np.array:
    """
    Train a pytorch network and predict outputs for test_inputs
    """
    network = myPytorchNetwork(*network_parameters)
    train_torch_network(network, train_dataset, 30, 10, learning_rate=0.01, silent=True)
    prediction = network(torch.from_numpy(test_inputs)).detach().numpy().flatten()
    return prediction


def measure_model_error_multiple_times(model: Callable, dataset: np.array, train_ratio=0.85, num_runs=5, silent=False):
    """
    :param model: a callable that accepts train dataset, test inputs, and produces the prediction for test inputs
    :param dataset: dataset to train on of shape (n_samples, n_features), where the last column is the value to be predicted
    :param train_ratio: how much data to put into the training dataset
    :param num_runs: number of runs with reshuffled dataset
    :param silent: if true, not print any output
    :return: mean MSE through all the runs
    """
    split_index = int(dataset.shape[0] * train_ratio)
    best_mape = 0
    best_mse = float("inf")
    for _ in range(num_runs):
        np.random.shuffle(dataset)
        train = dataset[:split_index, :]
        test = dataset[split_index:, :]

        prediction = model(train, test[:, :-1]).flatten()
        expected = test[:, -1].flatten()
        mse = np.mean((prediction - expected) ** 2)
        mape = 100 * np.mean(np.abs((prediction - expected) / expected))
        if not silent:
            print(f"Model MSE on test: {mse}")

        if mse < best_mse:
            best_mse = mse
            best_mape = mape

    return best_mape, best_mse

In [284]:
# Load the data
turbine = np.genfromtxt("processed_datasets/turbine.csv", dtype=np.float32, delimiter=',', skip_header=1)
synthetic = np.genfromtxt("processed_datasets/synthetic.csv", dtype=np.float32, delimiter=',', skip_header=1)
boston = np.genfromtxt("processed_datasets/boston.csv", dtype=np.float32, delimiter=',', skip_header=1)
turbine_input, turbine_output = turbine[:, :4], turbine[:, 4]

#### Just trying multiple times with different parameters to make sure that everything works

In [5]:
# Trying a couple of times with different splits into training and test data for a better understanging
measure_model_error_multiple_times(run_linear_regression, turbine)

Model MSE on test: 0.0013430756516754627
Model MSE on test: 0.0009350292384624481
Model MSE on test: 0.0014231828972697258
Model MSE on test: 0.0010630425531417131
Model MSE on test: 0.0010568056022748351


0.001164227188564837

In [6]:
# Now trying, maybe applying some functions to features can make the situation better. There is no obvious relation here, but after trying, we found this gives better results (not always, but mostly better)

modified_turbine = turbine.copy()
modified_turbine[:, 1] = modified_turbine[:, 1] ** 2
modified_turbine[:, 0] = modified_turbine[:, 0] ** 2
measure_model_error_multiple_times(run_linear_regression, modified_turbine)

Model MSE on test: 0.0008462416590191424
Model MSE on test: 0.0014376954641193151
Model MSE on test: 0.0013477603206411004
Model MSE on test: 0.0009092555264942348
Model MSE on test: 0.0007819987367838621


0.001064590341411531

In [179]:
measure_model_error_multiple_times(lambda *args: run_pytorch_network([[9, 5, 1], 'tanh'], *args), synthetic, num_runs=2)

Model MSE on test: 0.0012404868612065911
Model MSE on test: 0.0012759158853441477


8.158766105771065

In [180]:
measure_model_error_multiple_times(lambda *args: run_my_neural_network([[9, 5, 1], 50, 0.01, 0.9, "tanh", 0.2], *args), synthetic, num_runs=5)

Model MSE on test: 0.0014333083615438412
Model MSE on test: 0.0010935233376317352
Model MSE on test: 0.0010191006833360889
Model MSE on test: 0.001083210922432468
Model MSE on test: 0.0018946499250184826


7.382092207193549

## Part 3.1

In [285]:
test_parameters = [
    # ["turbine", 3, [4, 5, 1], 30, 0.01, 0.9, "relu"],
    # ["turbine", 3, [4, 5, 1], 30, 0.01, 0.9, "sigmoid"],
    # ["turbine", 3, [4, 5, 1], 30, 0.01, 0.9, "tanh"],
    # ["turbine", 3, [4, 5, 1], 30, 0.01, 0.9, "linear"],
    # ["synthetic", 4, [9, 8, 6, 1], 30, 0.01, 0.9, "relu"],
    # ["synthetic", 4, [9, 8, 6, 1], 30, 0.01, 0.9, "sigmoid"],
    # ["synthetic", 4, [9, 8, 6, 1], 30, 0.01, 0.9, "tanh"],
    # ["synthetic", 4, [9, 8, 6, 1], 30, 0.01, 0.9, "linear"],
    ["boston", 4, [13, 8, 6, 1], 50, 0.001, 0.9, "relu"],
    ["boston", 4, [13, 8, 6, 1], 50, 0.01, 0.9, "relu"],
    ["boston", 4, [13, 8, 6, 1], 70, 0.001, 0.9, "tanh"],
    ["boston", 4, [13, 8, 6, 1], 50, 0.01, 0.9, "tanh"],
]

np.random.seed(10)

# The generated output here can be just copied directly to LaTeX table
for parameters in test_parameters:
    dataset, n_layers, layers, epochs, lr, momentum, activation = parameters
    mape, mse = measure_model_error_multiple_times(lambda *args: run_my_neural_network([layers, epochs, lr, momentum, activation, 0.01], *args), eval(dataset), num_runs=5, silent=True)
    print(" & ".join(map(str, parameters)) + " & {:.1f} & {:.3f} \\\\\n\\hline".format(mape, mse))

boston & 4 & [13, 8, 6, 1] & 50 & 0.001 & 0.9 & relu & 17.2 & 0.007 \\
\hline
boston & 4 & [13, 8, 6, 1] & 50 & 0.01 & 0.9 & relu & 17.1 & 0.004 \\
\hline
boston & 4 & [13, 8, 6, 1] & 70 & 0.001 & 0.9 & tanh & 14.1 & 0.006 \\
\hline
boston & 4 & [13, 8, 6, 1] & 50 & 0.01 & 0.9 & tanh & 12.2 & 0.003 \\
\hline


## Part 3.2
