In [1]:
## Useful libraries
import matplotlib.pyplot as plt
import numpy as np
import torch
import torch.nn as nn
import os
import copy
import pickle
from urllib.request import urlretrieve
from torch.utils.data import DataLoader
from torch.utils.data.dataset import random_split
from sklearn.preprocessing import MinMaxScaler
from matplotlib.colors import TwoSlopeNorm
import os.path
import glob
import pathlib

from cycler import cycler
import seaborn as sns

# Set the color scheme
sns.set_theme()
colors = ['#0076C2', '#EC6842', '#A50034', '#009B77', '#FFB81C', '#E03C31', '#6CC24A', '#EF60A3', '#0C2340', '#00B8C8', '#6F1D77']
plt.rcParams['axes.prop_cycle'] = cycler(color=colors)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Model (Copy from FAT workshop)

Your main task throughout this notebook will be to define a UNET-based CNN architecture.
You are free to design the architecture as you prefer, as long as the model does its purpose and produces decent results.

In [3]:
# Create you own CNN model

# Define the model
# model = ...

# ---------------------- student exercise --------------------------------- #
class CNNBlock(nn.Module):
    def __init__(self, in_channels, out_channels, kernel_size=3, padding=1, bias=False, batch_norm=True):
        super().__init__()
        
        layers = [nn.Conv2d(in_channels, out_channels, kernel_size=kernel_size, padding=padding, bias=bias)]
        if batch_norm:
            layers.append(nn.BatchNorm2d(num_features=out_channels))
        layers.append(nn.PReLU())
        layers.append(nn.Conv2d(out_channels, out_channels, kernel_size=kernel_size, padding=padding, bias=bias))
                
        self.cnnblock = nn.Sequential(*layers)

    def forward(self, x):
        return self.cnnblock(x)

class Encoder(nn.Module):
    def __init__(self, channels=[32, 64, 128], kernel_size=3, padding=1, bias=False, batch_norm=True):
        super().__init__()

        self.enc_blocks = nn.ModuleList([
            CNNBlock(channels[block], channels[block+1], kernel_size, padding, bias, 
                     batch_norm=batch_norm) 
            for block in range(len(channels)-1)]
            )
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
        
    def forward(self, x):
        outs = []
        for block in self.enc_blocks:
            x = block(x)
            outs.append(x)
            x = self.pool(x)
        return outs

class Decoder(nn.Module):
    def __init__(self, channels=[128, 64, 32], kernel_size=3, padding=1, bias=False, batch_norm=True):
        super().__init__()
        self.channels = channels
        self.upconvs = nn.ModuleList([
            nn.ConvTranspose2d(channels[block], channels[block+1], kernel_size=2, padding=0, stride=2) 
            for block in range(len(channels)-1)]
            )
        self.dec_blocks = nn.ModuleList([
            CNNBlock(channels[block], channels[block+1], kernel_size, padding, bias, 
                     batch_norm=batch_norm)
             for block in range(len(channels)-1)]
             )
        
    def forward(self, x, x_skips):
        for i in range(len(x_skips)):
            x = self.upconvs[i](x)
            x = torch.cat((x, x_skips[-(1+i)]), dim=1)
            x = self.dec_blocks[i](x)

        x = self.dec_blocks[-1](x)
        return x
    
class CNN(nn.Module):
    def __init__(self, node_features, out_dim=1, n_downsamples=3, initial_hid_dim=64, batch_norm=True, 
                 bias=True):
        super(CNN, self).__init__()
        hidden_channels = [initial_hid_dim*2**i for i in range(n_downsamples)]
        encoder_channels = [node_features]+hidden_channels
        decoder_channels = list(reversed(hidden_channels))+[out_dim]

        self.encoder = Encoder(encoder_channels, kernel_size=3, padding=1, 
                               bias=bias, batch_norm=batch_norm)
        self.decoder = Decoder(decoder_channels, kernel_size=3, padding=1, 
                               bias=bias, batch_norm=batch_norm)
        
    def forward(self, x):
        x = self.encoder(x)
        x = self.decoder(x[-1], x[:-1])
        x = nn.Sigmoid()(x)
        return x
# ---------------------- student exercise --------------------------------- #

In [17]:
node_features = 3 # WD, VX, VY
model = CNN(node_features=node_features, n_downsamples=4, initial_hid_dim=32, 
            batch_norm=True, bias=True)

# Data Loading

Loading the normalized data from the folder.

In [21]:
path = pathlib.Path().resolve()
proj_dir = str(path.parent)

path_tra = proj_dir + "\data/processed_data/normalized_training_data"
path_val = proj_dir + "\data/processed_data/normalized_validation_data"
path_tst = proj_dir + "\data/processed_data/normalized_test_data"

# Load training data
wd_train = glob.glob(path_tra + '/WD/*')
wd_tra = []
[wd_tra.append(np.loadtxt(wd_train[i])) for i in range(len(wd_train))]

# Load Validation data
wd_vald = glob.glob(path_val + '/WD/*')
wd_val = []
[wd_val.append(np.loadtxt(wd_vald[i])) for i in range(len(wd_vald))]
    

# Check if we load the data
print(np.shape(wd_tra))
print(type(wd_tra))
print(np.shape(wd_val))
print(type(wd_val))

MemoryError: Unable to allocate 849. MiB for an array with shape (280, 4096, 97) and data type float64

# Training

Define the training and evaluation functions needed to update the model's parameters.

Tip: you can use the same training and evaluation function we have used so far.

In [18]:
def train_epoch(model, loader, optimizer, device='cpu'):
    model.to(device)
    model.train() # specifies that the model is in training mode

    losses = []

    for batch in loader:
        x = batch[0]
        y = batch[1]

        # Model prediction
        preds = model(x)
        
        # MSE loss function
        loss = nn.MSELoss()(preds, y)
        
        losses.append(loss.cpu().detach())
        
        # Backpropagate and update weights
        loss.backward()   # compute the gradients using backpropagation
        optimizer.step()  # update the weights with the optimizer
        optimizer.zero_grad(set_to_none=True)   # reset the computed gradients

    losses = np.array(losses).mean()

    return losses

In [19]:
def evaluation(model, loader, device='cpu'):
    model.to(device)
    model.eval() # specifies that the model is in evaluation mode
    
    losses = []
    
    with torch.no_grad():
        for batch in loader:
            x = batch[0]
            y = batch[1]

            # Model prediction
            preds = model(x)

            # MSE loss function
            loss = nn.MSELoss()(preds, y)
            losses.append(loss.cpu().detach())

    losses = np.array(losses).mean()

    return losses

### Define the training paramters, the optimizer, and the dataloader

In [20]:
# Set training parameters
learning_rate = 0.001
batch_size = 64
num_epochs = 10

# Create the optimizer to train the neural network via back-propagation
optimizer = torch.optim.Adam(params=model.parameters(), lr=learning_rate)

# Create the training and validation dataloaders to "feed" data to the model in batches
train_loader = DataLoader(wd_tra, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(wd_val, batch_size=batch_size, shuffle=False)
# test_loader = DataLoader(normalized_test_dataset, batch_size=batch_size, shuffle=False)

NameError: name 'wd_val' is not defined

## Training and validating

And finally, it's time to train you model, with many epochs.

Make sure to train for enough epochs not to end training preemptively.

Remember to save your training and validation losses to check if your model is training properly.

In [None]:
# ---------------------- student exercise --------------------------------- #
#create vectors for the training and validation loss
train_losses = []
val_losses = []

for epoch in range(1, num_epochs+1):
    # Model training
    train_loss = train_epoch(model, train_loader, optimizer, device=device)

    # Model validation
    val_loss = evaluation(model, val_loader, device=device)

    if epoch == 1:
        best_loss = val_loss
    
    if val_loss<=best_loss:
        best_model = copy.deepcopy(model)
        best_loss = val_loss
        best_epoch = epoch

    train_losses.append(train_loss)
    val_losses.append(val_loss)

    if epoch%10 == 0:
        print("epoch:",epoch, "\t training loss:", np.round(train_loss,4),
                            "\t validation loss:", np.round(val_loss,4))
        
model = copy.deepcopy(best_model)
# ---------------------- student exercise --------------------------------- #

In [None]:
test_loss = evaluation(model, test_loader, device=device)
print(test_loss)

## Losses

Let's check if your training and validation losses are decreasing with the epochs

In [None]:
plt.plot(train_losses, label='Training')
plt.plot(val_losses, label='Validation')
plt.yscale('log')
plt.title('Losses')
plt.xlabel('Epochs')
plt.legend()
plt.show()