In [1]:
# Import libraries
import numpy as np
import torch
from torch import nn
from torch.utils.data import DataLoader
from torch.utils.data import TensorDataset
from sklearn import preprocessing
import datetime

In [2]:
# Define util to load .npy files
class Utils:
    @staticmethod
    def npy_to_tensor(path, preprocess=False):
        if preprocess:
            # Standardization of the data
            npy_data = np.load(path)
            scaler = preprocessing.StandardScaler().fit(npy_data)
            preprocessed_data = scaler.transform(npy_data)
            print(f"Mean: {preprocessed_data.mean(axis=0)} Standard deviation: {preprocessed_data.std(axis=0)}")
            return torch.from_numpy(preprocessed_data)
        else:
            return torch.from_numpy( np.load(path) )

In [3]:
# Load dataset separately
train_X = Utils.npy_to_tensor("workdir/x_train_PE.npy", preprocess=True)
train_y = Utils.npy_to_tensor("workdir/y_train_PE.npy")

test_X = Utils.npy_to_tensor("workdir/x_test_PE.npy", preprocess=True)
test_y = Utils.npy_to_tensor("workdir/y_test_PE.npy")

Mean: [-5.67566069e-15  2.37129827e-15 -2.25004104e-16 ... -1.35894422e-14
 -4.40751553e-15  1.24865357e-14] Standard deviation: [1. 1. 1. ... 1. 1. 1.]
Mean: [ 1.33062529e-15  1.43343588e-15 -8.49747623e-16 ... -5.83285372e-16
 -5.39614478e-15  1.49145160e-15] Standard deviation: [1. 1. 1. ... 1. 1. 1.]


In [4]:
# Join features and targets into one tensor dataset
train = TensorDataset(train_X, train_y)
test = TensorDataset(test_X, test_y)

In [5]:
# Create data loader for both training and testing data
train_dataloader = DataLoader(train, batch_size=64, shuffle=True)
test_dataloader = DataLoader(test)

In [6]:
# The metric for this task is Mean Absolute Error
class Metrics:
    @staticmethod
    def mae(y_pred, y_true):
        return np.mean( np.abs( np.subtract(y_pred, y_true) ) )

In [7]:
# Define our MLP-NN.
class MLPNN(nn.Module):
    def __init__(self):
        super(MLPNN, self).__init__()

        self.layers = nn.Sequential(
            nn.Linear(1307, 128),
            nn.ReLU(),
            nn.Linear(128, 64),
            nn.ReLU(),
            nn.Linear(64, 2),
        )

    def forward(self, x):
        return self.layers(x)

In [8]:
# Declare our MLP-NN
model = MLPNN()
print(model)

MLPNN(
  (layers): Sequential(
    (0): Linear(in_features=1307, out_features=128, bias=True)
    (1): ReLU()
    (2): Linear(in_features=128, out_features=64, bias=True)
    (3): ReLU()
    (4): Linear(in_features=64, out_features=2, bias=True)
  )
)


In [9]:
# Select loss function (criterion) and optimizer
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.0001)

In [10]:
# Train the model
start = datetime.datetime.now()
for epoch in range(100):
    train_loss = 0
    for features, targets in train_dataloader:
        optimizer.zero_grad()

        predictions = model( features.float() )

        loss = criterion( predictions.float(), targets.float() )
        loss.backward()

        optimizer.step()

        train_loss += loss.item()
        
end = datetime.datetime.now()
training_time = end - start
print(f"Training time: {training_time}")

Training time: 0:00:16.908762


In [11]:
# Perform the predictions. Note: we convert the tensors to standard lists - it can be changed
y_pred = []
y_true = []
with torch.no_grad():
    for features, targets in test_dataloader:
        predictions = model( features.float() )

        y_pred.append( predictions.tolist() )
        y_true.append( targets.tolist() )

In [12]:
# Calculate Mean Absolute Error
mae = Metrics.mae(y_pred, y_true)
print("MAE:", mae)

MAE: 0.025376465342498038


In [13]:
# Just for comparing some values
y_pred = np.around(y_pred, 3).tolist()
print("Predictions:", y_pred[:5])
print("Ground truth:", y_true[:5])

Predictions: [[[0.735, 0.266]], [[0.34, 0.693]], [[0.658, 0.361]], [[0.848, 0.168]], [[0.487, 0.529]]]
Ground truth: [[[0.776, 0.224]], [[0.324, 0.676]], [[0.682, 0.318]], [[0.851, 0.149]], [[0.5, 0.5]]]
