This Pipeline is based on the computer vision lab where the functions were modified for the purpose of this lab

In [1]:
import numpy as np
import os
import pandas as pd
from scipy.stats import zscore
from sklearn.metrics import r2_score
#from sklearn.model_selection import train_test_split
from numpy.random import RandomState

import torch
import torch.nn as nn
from torch.utils.data import Dataset
import torch.nn.functional as F

from torch.utils.data import DataLoader
from torch.optim import Adam

In [2]:
SEED= 123456789
BATCH_SIZE = 10
NUM_WORKERS = 4
NUM_EPOCHS = 100
VALIDATION_INDEX = 750 #We will take the VALIDATION_INDEX firts samples as training and the remaining as validation

In [3]:
# Updating mean class for loss aggregation.
class UpdatingMean():
    def __init__(self):
        self.sum = 0
        self.n = 0

    def mean(self):
        return self.sum / self.n

    def add(self, loss):
        self.sum += loss
        self.n += 1

In [4]:
class MRIDataset(Dataset):
    def __init__(self, train=True):
        super().__init__()
        
        X_train_test = pd.read_csv('data/X_train_cleaned.csv')
        y_train_test = pd.read_csv('data/y_train_cleaned.csv')
        
        #Split in test and validation:
        prng = RandomState(SEED)
        train_mask = prng.rand(len(X_train_test)) < 0.8 #~80% of training and 20% of validation
                
        if(train):
            self.samples = X_train_test[train_mask].to_numpy()
            self.annotations= y_train_test[train_mask].to_numpy()
        else:
            self.samples = X_train_test[~train_mask].to_numpy()
            self.annotations= y_train_test[~train_mask].to_numpy()
                                        
    def __len__(self):
        # Returns the number of samples in the dataset.
        return self.samples.shape[0]
    
    def __getitem__(self, idx):
        # Returns the sample and annotation with index idx.
        sample = self.samples[idx]
        annotation = self.annotations[idx]
                
        # Convert to tensor.
        return {
            'input': torch.from_numpy(sample).float(),
            'annotation': torch.from_numpy(annotation).float()
        }

In [5]:
def r2_loss(output, target):
    target_mean = torch.mean(target)
    ss_tot = torch.sum((target - target_mean) ** 2)
    ss_res = torch.sum((target - output) ** 2)
    r2 = 1 - ss_res / ss_tot if ss_tot != 0 else torch.zeros(1) #What to do when divided by zero?
    return r2

In [6]:
def run_training_epoch(net, optimizer, dataloader):
    loss_aggregator = UpdatingMean()
    # Put the network in training mode.
    net.train()
    # Loop over batches.
    for batch in dataloader:
        # Reset gradients.
        optimizer.zero_grad()

        # Forward pass.
        X = batch["input"]
        y = batch["annotation"]
        output = net(X)

        # Compute the loss - MAE
        loss = nn.L1Loss()
        output_loss = loss(output, y)

        # Backwards pass.
        output_loss.backward()
        optimizer.step()

        # Save loss value in the aggregator.
        loss_aggregator.add(output_loss.detach().numpy())
    return loss_aggregator.mean()


def compute_accuracy(output, labels):
    return r2_loss(output, labels)


def run_validation_epoch(net, dataloader):
    accuracy_aggregator = UpdatingMean()
    # Put the network in evaluation mode.
    net.eval()
    # Loop over batches.
    for i, batch in enumerate(dataloader):
        # Forward pass only.
        output = net(batch['input'])

        # Compute the accuracy using compute_accuracy.
        accuracy = compute_accuracy(output.detach().numpy(), batch['annotation'])

        # Save accuracy value in the aggregator.
        accuracy_aggregator.add(accuracy.item())
    return accuracy_aggregator.mean()

In [11]:
class MLPTest(nn.Module):
    def __init__(self):
        super().__init__()

        self.codename = 'mlp'

        # Define the network layers in order.
        # Input is 726D.
        # Output is a single value.
        # Multiple linear layers each followed by a LeakyReLU non-linearity.
        self.layers = nn.Sequential(
            nn.Linear(726, 100),
            nn.LeakyReLU(),
            nn.Linear(100, 150),
            nn.LeakyReLU(),
            nn.Linear(150, 1),
            nn.LeakyReLU()
        )
    
    def forward(self, batch):
        # Process batch using the layers.
        x = self.layers(batch)
        return x

In [13]:
# Create the training dataset and dataloader.
train_dataset = MRIDataset(train=True)
train_dataloader = DataLoader(
    train_dataset,
    batch_size=BATCH_SIZE,
    num_workers=NUM_WORKERS,
    shuffle=True
)
# Create the validation dataset and dataloader.
valid_dataset = MRIDataset(train=False)
valid_dataloader = DataLoader(
    valid_dataset,
    batch_size=BATCH_SIZE,
    num_workers=NUM_WORKERS
)

# Create the network.
net = MLPTest()

# Create the optimizer.
optimizer = Adam(net.parameters(), lr=0.005)

# Main training loop.
best_accuracy = 0
for epoch_idx in range(NUM_EPOCHS):
    # Training code.
    loss = run_training_epoch(net, optimizer, train_dataloader)
    print('[Epoch %02d] Loss: %.4f' % (epoch_idx + 1, loss))

    # Validation code.
    acc = run_validation_epoch(net, valid_dataloader)
    print('[Epoch %02d] r2_score.: %.4f' % (epoch_idx + 1, acc))

    # Save checkpoint if accuracy is the best so far.
    checkpoint = {
        'epoch_idx': epoch_idx,
        'net': net.state_dict(),
        'optimizer': optimizer.state_dict(),
    }
    if acc > best_accuracy:
        best_accuracy = acc
        torch.save(checkpoint, f'best-{net.codename}.pth')


[Epoch 01] Loss: 34.8019
[Epoch 01] r2_score.: -15.3883
[Epoch 02] Loss: 22.0019
[Epoch 02] r2_score.: -14.0344
[Epoch 03] Loss: 16.7106
[Epoch 03] r2_score.: -9.1285
[Epoch 04] Loss: 14.1468
[Epoch 04] r2_score.: -8.6176
[Epoch 05] Loss: 11.5266
[Epoch 05] r2_score.: -7.9749
[Epoch 06] Loss: 10.8761
[Epoch 06] r2_score.: -6.8809
[Epoch 07] Loss: 9.5303
[Epoch 07] r2_score.: -6.4303
[Epoch 08] Loss: 9.7316
[Epoch 08] r2_score.: -7.7254
[Epoch 09] Loss: 9.9943
[Epoch 09] r2_score.: -8.0056
[Epoch 10] Loss: 9.0102
[Epoch 10] r2_score.: -7.2296
[Epoch 11] Loss: 8.0100
[Epoch 11] r2_score.: -6.9948
[Epoch 12] Loss: 7.9675
[Epoch 12] r2_score.: -7.3746
[Epoch 13] Loss: 7.9124
[Epoch 13] r2_score.: -5.8719
[Epoch 14] Loss: 7.2796
[Epoch 14] r2_score.: -6.3524
[Epoch 15] Loss: 7.3535
[Epoch 15] r2_score.: -7.1675
[Epoch 16] Loss: 6.8215
[Epoch 16] r2_score.: -5.3834
[Epoch 17] Loss: 6.7409
[Epoch 17] r2_score.: -5.9551
[Epoch 18] Loss: 6.7115
[Epoch 18] r2_score.: -8.4022
[Epoch 19] Loss: 6.9