# ConvLSTM model

Import libraries and modules.

In [1]:
import torch
import os
# import imageio

import numpy as np
import pandas as pd
# import matplotlib.pyplot as plt
# import torch.nn as nn
# import torch.nn.functional as F

# from numba import jit, prange

# from PIL import Image
# from sklearn.preprocessing import MinMaxScaler
# from torchsummary import summary
# from torch.utils.data import DataLoader
# from matplotlib.colors import TwoSlopeNorm

from load_datasets import *
from ConvLSTM_pytorch import *

Check if GPU is available.

In [2]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f'Using device: {device}')

Using device: cuda


The following paths access the main folder (i.e., _dataset_train_val_, _dataset1_ and so on). The path of the specific type of data (_DEM_, _VX_ and so on) is to be specified after.

In [3]:
path_train = f'../dataset_train_val/' 
path_test1 = f'../dataset1/'
path_test2 = f'../dataset2/'
path_test3 = f'../dataset3/'

The following lines create variables to more easily specify what we use the model for (i.e., train and validate, test with dataset 1 and so on) in the following functions.

In [4]:
train_val = 'train_val'
test1 = 'test1'
test2 = 'test2'
test3 = 'test3'

Load data.

Load DEM files.

In [5]:
inputs, targets = load_all_boys('train_val')

0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79


In [30]:
def encode_into_csv(inputs, targets, train_val_test):
    """
    Due to the long run time of computing all inputs and targets, these will be encoded into a csv file
    to reduce the computatio duration

    Input:
    inputs: torch.tensor of shape: samples x 3 x 64 x 64 which represents the inputs of the network
    targets: torch.tensor of shape: samples x time steps x 64 x 64 which represents the targets of the network
    train_val_test: str, differentiate between csv files

    Outputs:
    None: But a csv file is create with a predetermined name
    """
    # Flatten the tensors and concatenate them along the specified dimension
    flattened_tensor1 = torch.flatten(inputs, start_dim=0)
    flattened_tensor2 = torch.flatten(targets, start_dim=0)

    # Convert the tensor to a pandas DataFrame
    df_inputs = pd.DataFrame(flattened_tensor1.numpy())
    df_targets = pd.DataFrame(flattened_tensor2.numpy())

    # Save the DataFrame to a CSV file
    df_inputs.to_csv(train_val_test + '_in.csv', index=False)

    # if train_val_test = 'train_val' targets file is too big to be loaded in GitHub
    # and it needs to be split into 4 different .csv files
    # n_tot = 63569920 total number of rows of targets (80x2x97x64x64)
    # n = n_tot/4 to split in 4 separate files
    n_tot = int(targets.size(0) * targets.size(1) * targets.size(2) * targets.size(3) * targets.size(4))
    n = int(n_tot / 4)

    if train_val_test == 'train_val':
        df_targets[:n].to_csv(train_val_test + '_tar1.csv', index=False)
        df_targets[n:2*n].to_csv(train_val_test + '_tar2.csv', index=False)
        df_targets[2*n:3*n].to_csv(train_val_test + '_tar3.csv', index=False)
        df_targets[3*n:].to_csv(train_val_test + '_tar4.csv', index=False)
    else: 
        df_targets.to_csv(train_val_test + '_tar.csv', index=False)
    return df_inputs, df_targets

In [31]:
def decode_from_csv(train_val_test):
    """
    Due to the long run time of computing all inputs and targets, a csv file will be opened
    at the start of every notebook which represents the inputs and targets for a certain dataset

    Input:
    train_val_test: str, identifies which dataset is being retrieved

    Output:
    inputs: torch.Tensor which contains DEM, slope x and y for all files in a dataset
            Shape is samples x 3 x 64 x 64
    targets: torch.Tensor which contains water depth and discharge for all files in a dataset.
            Shape is samples x time steps x 2 x 64 x 64
    """
    df_inputs = pd.read_csv(train_val_test + '_in.csv')
    
    # if train_val_test = 'train_val' targets file is too big to be loaded in GitHub
    # and it needs to be split into 4 different .csv files
    if train_val_test == 'train_val':
        df_targets1 = pd.read_csv(train_val_test + '_tar1.csv')
        df_targets2 = pd.read_csv(train_val_test + '_tar2.csv')
        df_targets3 = pd.read_csv(train_val_test + '_tar3.csv')
        df_targets4 = pd.read_csv(train_val_test + '_tar4.csv')

        df_targets = pd.concat([df_targets1, df_targets2, 
                                df_targets3, df_targets4], axis=0) 
    else:
        df_targets = pd.read_csv(train_val_test + '_tar.csv')

    # Convert the DataFrame to a PyTorch tensor
    restored_inputs = torch.tensor(df_inputs.values)
    restored_targets = torch.tensor(df_targets.values)

    # Determine the original shapes of the tensors
    if 'train_val':
        samples = 80
    elif 'test1':
        samples = 21
    elif 'test2':
        samples = 20
    else:
        samples = 10

    shape_tensor1 = (samples, 3, 64, 64)
    shape_tensor2 = (samples, 97, 2, 64, 64)

    # Split the restored tensor into two tensors based on the original shapes
    inputs = torch.reshape(restored_inputs, shape_tensor1)
    targets = torch.reshape(restored_targets, shape_tensor2)

    # Print the shapes of the restored tensors
    print("Restored inputs Shape:", inputs.shape)
    print("Restored targets Shape:", targets.shape)
    return inputs, targets

In [32]:
inps, targs = encode_into_csv(inputs, targets, train_val)

In [33]:
inputs, targets = decode_from_csv(train_val)

Restored inputs Shape: torch.Size([80, 3, 64, 64])
Restored targets Shape: torch.Size([80, 97, 2, 64, 64])


In [34]:
# not needed anymore?

# count = 0
# dir_path = path_train + 'DEM/' # Arbitrary choice as DEM, vx, vy and WD all have the same number of samples
# for path in os.listdir(dir_path):
#     if os.path.isfile(os.path.join(dir_path, path)):
#         count += 1
# inputs = torch.zeros((count, 3, 64, 64))
# targets = torch.zeros((count, 97, 2, 64, 64))
# print(count)

80


Test dataset 1.

In [None]:
inps1, targs1 = load_all_boys(test1)

In [None]:
inps1, targs1 = encode_into_csv(inps1, targs1, test1)

In [None]:
inps1, targs1 = decode_from_csv(test1)

Test dataset 2.

In [None]:
inps2, targs2 = load_all_boys(test2)

In [None]:
inps2, targs2 = encode_into_csv(inps2, targs2, test2)

In [None]:
inps1, targs1 = decode_from_csv(test2)

Test dataset 3.

In [None]:
inps3, targs3 = load_all_boys(test3)

In [None]:
inps3, targs3 = encode_into_csv(inps3, targs3, test3)

In [None]:
inps3, targs3 = decode_from_csv(test3)