In [9]:
import sys
import pathlib
import time

import matplotlib.pyplot as plt
import numpy as np
import random
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

from torch.utils.data import DataLoader, TensorDataset
from sklearn.model_selection import train_test_split

# Local
cwd = pathlib.Path().resolve()
src = cwd.parent
root = src.parent
sys.path.append(str(src))

from utils.watertopo import WaterTopo
from utils.simulation import Simulation
from utils.utils import count_parameters, create_sequence
from training.train import train_and_validate, evaluate_model
from models.convlstm import ConvLSTMCell

In [10]:
#initialize GPU -  In case of windows use cuda instead of nps
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

print("Is CUDA enabled?",torch.cuda.is_available())
print("Number of GPUs",torch.cuda.device_count())

Is CUDA enabled? False
Number of GPUs 0


In [39]:
class Encoder(nn.Module):
    def __init__(self, channels=[32, 64, 128], kernel_size=3, bias=False):
        super().__init__()

        self.cell_list = [
            ConvLSTMCell(channels[block], channels[block+1], kernel_size, bias)
            for block in range(len(channels)-1)]
        
        self.enc_blocks = nn.ModuleList(self.cell_list)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
        
    def forward(self, x, seq_len, start_state):

        cur_layer_input = x

        for i, block in enumerate(self.enc_blocks):
            h, c = start_state[i]
            output_inner = []

            for t in range(seq_len):
                h,c = block(cur_layer_input[:, t, :, :, :], cur_state=[h, c])
                output_inner.append(h)

            layer_output = torch.stack(output_inner, dim=1)
            cur_layer_input = layer_output
            
        return layer_output # , [h, c]
    
    
class Decoder(nn.Module):
    def __init__(self, channels=[128, 64, 32], kernel_size=3, bias=False):
        super().__init__()

        self.cell_list = [
            ConvLSTMCell(channels[block], channels[block+1], kernel_size, bias)
            for block in range(len(channels)-1)]

        self.dec_blocks = nn.ModuleList(self.cell_list)
    
    def forward(self, x, seq_len, start_state):

        cur_layer_input = x

        for i, block in enumerate(self.dec_blocks):
            h, c = start_state[i]
            output_inner = []

            for t in range(seq_len):
                h,c = block(cur_layer_input[:, t, :, :, :], cur_state=[h, c])
                output_inner.append(h)

            layer_output = torch.stack(output_inner, dim=1)
            cur_layer_input = layer_output
            
        return layer_output  #, [h, c]


class ConvLSTM(nn.Module):
    def __init__(self, input_channels, hidden_channels, output_channels, kernel_size, bias=True):
        super(ConvLSTM, self).__init__()
        
        encoder_channels = [input_channels]+hidden_channels
        decoder_channels = list(reversed(hidden_channels))+[output_channels]

        self.encoder = Encoder(encoder_channels, kernel_size=kernel_size, bias=bias)
        self.decoder = Decoder(decoder_channels, kernel_size=kernel_size, bias=bias)
                               
    def forward(self, x, hidden_state = None):

        batch_size, seq_len, channels, height, width = x.size()


        if hidden_state is None:
            hidden_state = self._init_hidden(batch_size=batch_size,
                                             image_size=(height, width))
            enc_hidden_state, dec_hidden_state = hidden_state

        x = self.encoder(x, seq_len, enc_hidden_state)
        x = self.decoder(x, seq_len, dec_hidden_state)
        
        return x
    
    def _init_hidden(self, batch_size, image_size):
        enc = []
        dec = []

        for i in range(len(self.encoder.cell_list)):
            enc.append(self.encoder.cell_list[i].init_hidden(batch_size, image_size))
            dec.append(self.encoder.cell_list[i].init_hidden(batch_size, image_size))
        
        init_states = enc, dec
        return init_states

In [40]:
model = ConvLSTM(input_channels=2, 
                 hidden_channels=[32,64], 
                 output_channels=1, 
                 kernel_size=[3,3], 
                 bias=True)

print(f"ConvLSTM --> num. trainable parameters:{count_parameters(model):8d}")

ConvLSTM --> num. trainable parameters:  372648


In [41]:
def train_lstm(model,
               channels=2,
              sim_amount=3,
              training_size=0.8,
              batch_size=4,
              T=5,
              H=1,
              num_epochs = 200,
              lr = 0.0005,
              criterion = nn.MSELoss(),
              optimizer = optim.AdamW,
              device = device,
              model_name = 'model_lstm'):
    
    """
    This method trains a simple RNN. Given a single timestep consisting of water depth and topography (both 64*64), the RNN predicts a single step ahead. The best model state is
    saved following the save_path, and also returned by the method.
    
    Description of arguments:
    - model: the model to be trained, should be an instance of the class SimpleRNN;
    - sim_amount (int): number of simulations of which the data is loaded and used for training, with a maximum of 400;
    - training_size (float): fraction of data to use for training (validation uses the fraction 1 - training_size);
    - batch_size (int): batch size used during training (you can modify this based on your requirements);
    - num_epochs (int): number of epochs used during training;
    - lr (float): learning rate used during training;
    - criterion: Loss function, default nn.MSELoss()
    - optimizer: optimizer used for training, default optim.AdamW
    - model_name (string): the best model state will be saved in ../results/trained_models/ under this name

    returns: model, train_losses, val_losses, best_val_loss, time
    """
    # load simulations to be used for training
    if channels == 2:
        sims = WaterTopo.load_simulations(str(root)+"/data/normalized_data/tra_val", sim_amount=sim_amount, number_grids=64)
    elif channels == 4:
        sims = Simulation.load_simulations(str(root)+"/data/normalized_data/tra_val", sim_amount=sim_amount, number_grids=64)

    X, Y = create_sequence(sims, T, H)

    # We keep track of indexes of train and validation.
    X_tra, X_tst, Y_tra, Y_tst, ix_tra, ix_tst = train_test_split(
        X, Y, np.arange(X.shape[0]), test_size=1-training_size, shuffle=True, random_state=42)
    
    # Split the existing test dataset into validation and test sets (50/50 split)
    X_val, X_tst, Y_val, Y_tst, ix_val, ix_tst = train_test_split(
        X_tst, Y_tst, ix_tst, test_size=0.5, shuffle=True, random_state=42)
    
    
    #create datasets and data loaders
    train_dataset = TensorDataset(torch.tensor(X_tra, dtype=torch.float32), torch.tensor(Y_tra, dtype=torch.float32))
    val_dataset = TensorDataset(torch.tensor(X_val, dtype=torch.float32), torch.tensor(Y_val, dtype=torch.float32))

    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)

    # defining the optimizer
    optimizer = optimizer(model.parameters(), lr=lr)

    # defining the save path
    save_path = "../results/trained_models/" + model_name

    # training
    train_losses, val_losses, best_val_loss, time = train_and_validate(model, train_loader, val_loader, criterion, optimizer, num_epochs, device, save_path)

    # Load the best model
    model.load_state_dict(torch.load(save_path))

    return model, train_losses, val_losses, best_val_loss, time

In [42]:
model, train_losses, bast_val_loss, time = train_lstm(model,
                                                    sim_amount=10,
                                                    training_size=0.8,
                                                    batch_size=4,
                                                    num_epochs = 50,
                                                    lr = 0.0005,
                                                    criterion = nn.MSELoss(),
                                                    optimizer = optim.AdamW,
                                                    device = device,
                                                    model_name = 'convlstm')

0
1
2
3
4
0


RuntimeError: Given groups=1, weight of size [4, 33, 3, 3], expected input[4, 96, 64, 64] to have 33 channels, but got 96 channels instead

In [4]:
def train_lstm(model,
              sim_amount=1,
              training_size=0.8,
              batch_size=4,
              num_epochs = 200,
              lr = 0.0005,
              criterion = nn.MSELoss(),
              optimizer = optim.AdamW,
              model_name = 'babie_first_RNN'):
    
    """
    This method trains a simple RNN. Given a single timestep consisting of water depth and topography (both 64*64), the RNN predicts a single step ahead. The best model state is
    saved following the save_path, and also returned by the method.
    
    Description of arguments:
    - model: the model to be trained, should be an instance of the class SimpleRNN;
    - sim_amount (int): number of simulations of which the data is loaded and used for training, with a maximum of 400;
    - training_size (float): fraction of data to use for training (validation uses the fraction 1 - training_size);
    - batch_size (int): batch size used during training (you can modify this based on your requirements);
    - num_epochs (int): number of epochs used during training;
    - lr (float): learning rate used during training;
    - criterion: Loss function, default nn.MSELoss()
    - optimizer: optimizer used for training, default optim.AdamW
    - model_name (string): the best model state will be saved in ../results/trained_models/ under this name

    returns: model, train_losses, val_losses, best_val_loss, time
    """
    # load simulations to be used for training
    sims = Simulation.load_simulations(str(root)+"/data/processed_data/normalized_training_data", sim_amount=sim_amount, number_grids=64)

    n_timesteps = 96
    grid_size = 64
    channels = 2   # water depth and topography

    # reformat the data
    X = np.zeros((len(sims), n_timesteps, channels, grid_size, grid_size))   # timestep * grid_x * grid_y * channels
    Y = np.zeros(X[:,:,0,:,:].shape)   # timestep * grid_x * grid_y

    for i in range(len(sims)):   # number of simulations loaded in for training/validation

        sim = sims[i]            # get simulation
        topography = sim.topography

        for t_i in range(n_timesteps):    # number of timesteps

            wd, vx, vy = sim.return_timestep(t_i)
            X[i, t_i, 0, :, :] = wd
            X[i, t_i, 1, :, :] = topography

            wd, vx, vy = sim.return_timestep(t_i+1)
            Y[i, t_i, :, :] = wd

    # split the data into training and validation
    id_training = int(training_size * len(X))

    X_tra = X[:id_training, :]
    Y_tra = Y[:id_training, :]

    X_val = X[id_training:, :]
    Y_val = Y[id_training:, :]

    print("X_tra.shape: ", X_tra.shape)
    print("Y_tra.shape: ", Y_tra.shape)
    print("X_val.shape: ", X_val.shape)
    print("Y_val.shape: ", Y_val.shape)

    #create datasets and data loaders
    train_dataset = TensorDataset(torch.tensor(X_tra, dtype=torch.float32), torch.tensor(Y_tra, dtype=torch.float32))
    val_dataset = TensorDataset(torch.tensor(X_val, dtype=torch.float32), torch.tensor(Y_val, dtype=torch.float32))

    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)

    # defining the optimizer
    optimizer = optimizer(model.parameters(), lr=lr)

    # defining the save path
    save_path = "../results/trained_models/" + model_name

    # training
    train_losses, val_losses, best_val_loss, time = train_and_validate(model, train_loader, val_loader, criterion, optimizer, num_epochs, device, save_path)

    # Load the best model
    model.load_state_dict(torch.load(save_path))

    return model, train_losses, val_losses, best_val_loss, time

In [5]:
model, train_losses, bast_val_loss, time = train_lstm(model,
                                                    sim_amount=10,
                                                    training_size=0.8,
                                                    batch_size=4,
                                                    num_epochs = 50,
                                                    lr = 0.0005,
                                                    criterion = nn.MSELoss(),
                                                    optimizer = optim.AdamW,
                                                    model_name = 'convlstm')

X_tra.shape:  (8, 96, 2, 64, 64)
Y_tra.shape:  (8, 96, 64, 64)
X_val.shape:  (2, 96, 2, 64, 64)
Y_val.shape:  (2, 96, 64, 64)
[tensor([[[[[-3.0587e-04, -2.3024e-04, -2.3213e-04,  ..., -2.3215e-04,
            -2.2816e-04, -1.9540e-04],
           [-4.0259e-04, -2.6362e-04, -2.4893e-04,  ..., -2.4897e-04,
            -2.3753e-04, -3.5279e-04],
           [-4.1805e-04, -2.7951e-04, -2.6072e-04,  ..., -2.6076e-04,
            -2.4952e-04, -3.5388e-04],
           ...,
           [-4.1801e-04, -2.7949e-04, -2.6072e-04,  ..., -2.6077e-04,
            -2.4956e-04, -3.5387e-04],
           [-4.0485e-04, -2.6847e-04, -2.4926e-04,  ..., -2.4938e-04,
            -2.4088e-04, -3.5269e-04],
           [-3.0286e-04, -2.4132e-04, -2.2226e-04,  ..., -2.2235e-04,
            -2.2578e-04, -3.7496e-04]],

          [[-2.2780e-03, -2.3192e-03, -2.3156e-03,  ..., -2.3156e-03,
            -2.3182e-03, -2.3474e-03],
           [-2.4101e-03, -2.4286e-03, -2.4367e-03,  ..., -2.4367e-03,
            -2.4295e-0

AttributeError: 'tuple' object has no attribute 'size'