In [2]:
import sys
import pathlib

import matplotlib.pyplot as plt
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim

from torch.utils.data import DataLoader, TensorDataset
from sklearn.model_selection import train_test_split

# Local
cwd = pathlib.Path().resolve()
src = cwd.parent
root = src.parent
sys.path.append(str(src))

from utils.utils import count_parameters, create_sequence
from utils.train_lstm import train_and_validate
from utils.watertopo import WaterTopo

In [3]:
#initialize GPU -  In case of windows use cuda instead of nps
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

print("Is CUDA enabled?",torch.cuda.is_available())
print("Number of GPUs",torch.cuda.device_count())

Is CUDA enabled? False
Number of GPUs 0


In [4]:
class ConvLSTMCell(nn.Module):

    def __init__(self, input_dim, hidden_dim, kernel_size, bias):
        """
        Initialize ConvLSTM cell.

        Parameters
        ----------
        input_dim: int
            Number of channels of input tensor.
        hidden_dim: int
            Number of channels of hidden state.
        kernel_size: (int, int)
            Size of the convolutional kernel.
        bias: bool
            Whether or not to add the bias.
        """

        super(ConvLSTMCell, self).__init__()

        self.input_dim = input_dim
        self.hidden_dim = hidden_dim

        self.kernel_size = kernel_size
        self.padding = kernel_size[0] // 2, kernel_size[1] // 2
        self.bias = bias

        self.conv = nn.Conv2d(in_channels=self.input_dim + self.hidden_dim,
                              out_channels=4 * self.hidden_dim,
                              kernel_size=self.kernel_size,
                              padding=self.padding,
                              bias=self.bias)

    def forward(self, input_tensor, cur_state):
        h_cur, c_cur = cur_state

        combined = torch.cat([input_tensor, h_cur], dim=1)  # concatenate along channel axis

        combined_conv = self.conv(combined)
        cc_i, cc_f, cc_o, cc_g = torch.split(combined_conv, self.hidden_dim, dim=1)
        i = torch.sigmoid(cc_i)
        f = torch.sigmoid(cc_f)
        o = torch.sigmoid(cc_o)
        g = torch.tanh(cc_g)

        c_next = f * c_cur + i * g
        h_next = o * torch.tanh(c_next)

        return h_next, c_next

    def init_hidden(self, batch_size, image_size):
        height, width = image_size
        return (torch.zeros(batch_size, self.hidden_dim, height, width, device=self.conv.weight.device),
                torch.zeros(batch_size, self.hidden_dim, height, width, device=self.conv.weight.device))
    

class Encoder(nn.Module):
    def __init__(self, channels=[32, 64, 128], kernel_size=3, bias=False):
        super().__init__()

        self.cell_list = [
            ConvLSTMCell(channels[block], channels[block+1], kernel_size, bias)
            for block in range(len(channels)-1)]
        
        self.enc_blocks = nn.ModuleList(self.cell_list)
        # self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
        
    def forward(self, x, seq_len, start_state):

        cur_layer_input = x

        for i, block in enumerate(self.enc_blocks):
            h, c = start_state[i]
            output_inner = []

            for t in range(seq_len):
                h,c = block(cur_layer_input[:, t, :, :, :], cur_state=[h, c])
                output_inner.append(h)

            layer_output = torch.stack(output_inner, dim=1)
            cur_layer_input = layer_output
            
        return layer_output # , [h, c]
    
    
class Decoder(nn.Module):
    def __init__(self, channels=[128, 64, 32], kernel_size=3, bias=False):
        super().__init__()

        self.cell_list = [
            ConvLSTMCell(channels[block], channels[block+1], kernel_size, bias)
            for block in range(len(channels)-1)]

        self.dec_blocks = nn.ModuleList(self.cell_list)
    
    def forward(self, x, seq_len, start_state):

        cur_layer_input = x

        for i, block in enumerate(self.dec_blocks):
            h, c = start_state[i]
            output_inner = []

            for t in range(seq_len):
                h,c = block(cur_layer_input[:, t, :, :, :], cur_state=[h, c])
                output_inner.append(h)

            layer_output = torch.stack(output_inner, dim=1)
            cur_layer_input = layer_output
            
        return layer_output  #, [h, c]


class ConvLSTM(nn.Module):
    def __init__(self, input_channels, hidden_channels, output_channels, kernel_size, bias=True):
        super(ConvLSTM, self).__init__()
        
        encoder_channels = [input_channels]+hidden_channels
        decoder_channels = list(reversed(hidden_channels))+[output_channels]

        self.encoder = Encoder(encoder_channels, kernel_size=kernel_size, bias=bias)
        self.decoder = Decoder(decoder_channels, kernel_size=kernel_size, bias=bias)
                               
    def forward(self, x, hidden_state = None):

        batch_size, seq_len, channels, height, width = x.size()


        if hidden_state is None:
            hidden_state = self._init_hidden(batch_size=batch_size,
                                             image_size=(height, width))
            enc_hidden_state, dec_hidden_state = hidden_state

        x = self.encoder(x, seq_len, enc_hidden_state)
        x = self.decoder(x, seq_len, dec_hidden_state)
        
        return x
    
    def _init_hidden(self, batch_size, image_size):
        enc = []
        dec = []

        for i in range(len(self.encoder.cell_list)):
            enc.append(self.encoder.cell_list[i].init_hidden(batch_size, image_size))
            dec.append(self.encoder.cell_list[i].init_hidden(batch_size, image_size))
        
        init_states = enc, dec
        return init_states

In [9]:
class ConvLSTM(nn.Module):
    def __init__(self, input_channels, hidden_channels, output_channels, kernel_size, bias=True):
        super(ConvLSTM, self).__init__()

        self.layer1 = ConvLSTMCell(input_channels, hidden_channels, kernel_size, bias)
        self.layer2 = nn.Conv2d(hidden_channels, output_channels, 1)
                               
    def forward(self, x, hidden_state = None):

        batch_size, seq_len, channels, height, width = x.size()

        layer_output_list = []
        last_state_list = []

        seq_len = x.size(1)
        cur_layer_input = x

        h, c = hidden_state
        output_inner = []

        for t in range(seq_len):
            h, c = self.layer1(input_tensor=cur_layer_input[:, t, :, :, :],
                                                cur_state=[h, c])
            output_inner.append(h)

        layer_output = torch.stack(output_inner, dim=1)
        cur_layer_input = layer_output

        layer_output_list.append(layer_output)
        last_state_list.append([h, c])

        # if not self.return_all_layers:
        # layer_output_list = layer_output_list[-1:]
        # last_state_list = last_state_list[-1:]

        layer_output = self.layer2(layer_output)

        return layer_output, [h,c]
        
            
    def _init_hidden(self, batch_size, image_size):
        init_states = self.layer1.init_hidden(batch_size, image_size)

        return init_states

In [10]:
model = ConvLSTM(2, 10, 1, (3,3), True)

print(f"ConvLSTM --> num. trainable parameters:{count_parameters(model):8d}")

ConvLSTM --> num. trainable parameters:    4371


In [11]:
sim_amount = 2
use_augmented_data = False
T = 5
H = 1
training_size = 0.8
batch_size = 1
num_epochs = 200
lr = 0.0005
criterion = nn.MSELoss()
optimizer = optim.AdamW
model_name = "convlstm"

In [12]:
sims = WaterTopo.load_simulations(str(root)+"/data/normalized_data/tra_val", sim_amount=sim_amount, number_grids=64, use_augmented_data=use_augmented_data)

X, Y = create_sequence(sims, T, H)

# We keep track of indexes of train and validation.
X_tra, X_tst, Y_tra, Y_tst, ix_tra, ix_tst = train_test_split(
    X, Y, np.arange(X.shape[0]), test_size=1-training_size, shuffle=True, random_state=42)

# Split the existing test dataset into validation and test sets (50/50 split)
X_val, X_tst, Y_val, Y_tst, ix_val, ix_tst = train_test_split(
    X_tst, Y_tst, ix_tst, test_size=0.5, shuffle=True, random_state=42)

#create datasets and data loaders
train_dataset = TensorDataset(torch.tensor(X_tra, dtype=torch.float32), torch.tensor(Y_tra, dtype=torch.float32))
val_dataset = TensorDataset(torch.tensor(X_val, dtype=torch.float32), torch.tensor(Y_val, dtype=torch.float32))

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)

# defining the optimizer
optimizer = optimizer(model.parameters(), lr=lr)

# defining the save path
save_path = "../results/trained_models/" + model_name

# training
train_losses, val_losses, best_val_loss, time = train_and_validate(model, train_loader, val_loader, criterion, optimizer, num_epochs, device, save_path, batch_size)

# Load the best model
model.load_state_dict(torch.load(save_path))

RuntimeError: Given groups=1, weight of size [1, 10, 1, 1, 1], expected input[1, 5, 10, 64, 64] to have 10 channels, but got 5 channels instead

In [None]:
model, train_losses, bast_val_loss, time = train(
        model,
        device,
        root,
        "convlstm",
        channels=2,
        T=5,
        H=1,
        sim_amount=3,
        training_size=0.8,
        batch_size=4,
        num_epochs = 200,
        lr = 0.0005,
        criterion = nn.MSELoss(),
        optimizer = optim.AdamW,
        )

RuntimeError: Given groups=1, weight of size [4, 33, 3, 3], expected input[4, 96, 64, 64] to have 33 channels, but got 96 channels instead