# ConvLSTM model

Import libraries and modules.

In [1]:
import torch
import os
# import imageio

import numpy as np
import pandas as pd
# import matplotlib.pyplot as plt
# import torch.nn as nn
# import torch.nn.functional as F

# from numba import jit, prange

# from PIL import Image
# from sklearn.preprocessing import MinMaxScaler
# from torchsummary import summary
# from torch.utils.data import DataLoader
# from matplotlib.colors import TwoSlopeNorm

from load_datasets import *
from ConvLSTM_pytorch import *

Check if GPU is available.

In [2]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f'Using device: {device}')

Using device: cuda


The following paths access the main folder (i.e., _dataset_train_val_, _dataset1_ and so on). The path of the specific type of data (_DEM_, _VX_ and so on) is to be specified after.

In [3]:
path_train = f'../dataset_train_val/' 
path_test1 = f'../dataset1/'
path_test2 = f'../dataset2/'
path_test3 = f'../dataset3/'

The following lines create variables to more easily specify what we use the model for (i.e., train and validate, test with dataset 1 and so on) in the following functions.

In [4]:
train_val = 'train_val'
test1 = 'test1'
test2 = 'test2'
test3 = 'test3'

Load data.

In [None]:
# inputs, targets = load_all_boys('train_val')

In [13]:
def encode_into_csv(inputs, targets, train_val_test):
    """
    Due to the long run time of computing all inputs and targets, these will be encoded into a csv file
    to reduce the computatio duration

    Input:
    inputs: torch.tensor of shape: samples x 3 x 64 x 64 which represents the inputs of the network
    targets: torch.tensor of shape: samples x time steps x 64 x 64 which represents the targets of the network
    train_val_test: str, differentiate between csv files

    Outputs:
    None: But a csv file is create with a predetermined name
    """
    # Flatten the tensors and concatenate them along the specified dimension
    flattened_tensor1 = torch.flatten(inputs, start_dim=0)
    flattened_tensor2 = torch.flatten(targets, start_dim=0)

    # Convert the tensor to a pandas DataFrame
    df_inputs = pd.DataFrame(flattened_tensor1.numpy())
    df_targets = pd.DataFrame(flattened_tensor2.numpy())

    # Save the DataFrame to a CSV file
    df_inputs.to_csv(train_val_test + '_in.csv', index=False)

    # if train_val_test = 'train_val' targets file is too big to be loaded in GitHub
    # and it needs to be split into 4 different .csv files
    # n_tot = 63569920 total number of rows of targets (80x2x97x64x64)
    # n = n_tot/4 to split in 4 separate files
    n_tot = int(targets.size(0) * targets.size(1) * targets.size(2) * targets.size(3) * targets.size(4))
    n = int(n_tot / 4)

    if train_val_test == 'train_val':
        df_targets[:n].to_csv(train_val_test + '_tar1.csv', index=False)
        df_targets[n:2*n].to_csv(train_val_test + '_tar2.csv', index=False)
        df_targets[2*n:3*n].to_csv(train_val_test + '_tar3.csv', index=False)
        df_targets[3*n:].to_csv(train_val_test + '_tar4.csv', index=False)
    else: 
        df_targets.to_csv(train_val_test + '_tar.csv', index=False)
    return df_inputs, df_targets

In [14]:
def decode_from_csv(train_val_test):
    """
    Due to the long run time of computing all inputs and targets, a csv file will be opened
    at the start of every notebook which represents the inputs and targets for a certain dataset

    Input:
    train_val_test: str, identifies which dataset is being retrieved

    Output:
    inputs: torch.Tensor which contains DEM, slope x and y for all files in a dataset
            Shape is samples x 3 x 64 x 64
    targets: torch.Tensor which contains water depth and discharge for all files in a dataset.
            Shape is samples x time steps x 2 x 64 x 64
    """
    df_inputs = pd.read_csv(train_val_test + '_in.csv')
    
    # if train_val_test = 'train_val' targets file is too big to be loaded in GitHub
    # and it needs to be split into 4 different .csv files
    if train_val_test == 'train_val':
        df_targets1 = pd.read_csv(train_val_test + '_tar1.csv')
        df_targets2 = pd.read_csv(train_val_test + '_tar2.csv')
        df_targets3 = pd.read_csv(train_val_test + '_tar3.csv')
        df_targets4 = pd.read_csv(train_val_test + '_tar4.csv')

        df_targets = pd.concat([df_targets1, df_targets2, 
                                df_targets3, df_targets4], axis=0) 
    else:
        df_targets = pd.read_csv(train_val_test + '_tar.csv')

    # Convert the DataFrame to a PyTorch tensor
    restored_inputs = torch.tensor(df_inputs.values)
    restored_targets = torch.tensor(df_targets.values)

    # Determine the original shapes of the tensors
    if 'train_val':
        samples = 80
    elif 'test1':
        samples = 21
    elif 'test2':
        samples = 20
    else:
        samples = 10

    shape_tensor1 = (samples, 3, 64, 64)
    shape_tensor2 = (samples, 97, 2, 64, 64)

    # Split the restored tensor into two tensors based on the original shapes
    inputs = torch.reshape(restored_inputs, shape_tensor1)
    targets = torch.reshape(restored_targets, shape_tensor2)

    # Print the shapes of the restored tensors
    print("Restored inputs Shape:", inputs.shape)
    print("Restored targets Shape:", targets.shape)
    return inputs, targets

In [None]:
# inps, targs = encode_into_csv(inputs, targets, train_val)

In [None]:
# inputs, targets = decode_from_csv(train_val)

In [None]:
# not needed anymore?

# count = 0
# dir_path = path_train + 'DEM/' # Arbitrary choice as DEM, vx, vy and WD all have the same number of samples
# for path in os.listdir(dir_path):
#     if os.path.isfile(os.path.join(dir_path, path)):
#         count += 1
# inputs = torch.zeros((count, 3, 64, 64))
# targets = torch.zeros((count, 97, 2, 64, 64))
# print(count)

Test dataset 1.

In [20]:
def process_elevation_data(file_id, train_val_test='train_val'):
    """
    Processes elevation data from a DEM file.

    Input:
    file_id (str): Identifier of the DEM file to be processed.
    train_val_test: key for specifying what we are using the model for
                   'train_val' = train and validate the model
                   'test1' = test the model with dataset 1
                   'test2' = test the model with dataset 2
                   'test3' = test the model with dataset 3

    Output:
    torch.Tensor: A tensor combining the original elevation data and its slope in x and y directions.
    """
    # specify what we use the model for -- so far works for only one specified input (i.e., file_id), 
    # will need to be improved to work for all inputs regardless of the number of the file
    if train_val_test == 'train_val':
        file_path = path_train + f'DEM/DEM_{file_id}.txt'
    elif train_val_test == 'test1':
        file_path = path_test1 + f'DEM/DEM_{500 + file_id}.txt'
    elif train_val_test == 'test2':
        file_path = path_test2 + f'DEM/DEM_{10000 + file_id}.txt'
    elif train_val_test == 'test3':
        file_path = path_test3 + f'DEM/DEM_{15001 + file_id}.txt'

    # # Construct the file path from the given file identifier
    # file_path = f'DEM_{file_id}.txt'

    # Load the elevation data from the file
    elevation_data = np.loadtxt(file_path)

    # Reshape the elevation data into a 64x64 grid
    elevation_grid = elevation_data[:, 2].reshape(64, 64)

    # Convert the elevation grid to a PyTorch tensor
    elevation_tensor = torch.tensor(elevation_grid)

    # Compute the slope in the x and y directions
    slope_x, slope_y = torch.gradient(elevation_tensor)

    # Combine the elevation tensor with the slope tensors
    elevation_slope_tensor = torch.stack((elevation_tensor, slope_x, slope_y), dim=0)

    return elevation_slope_tensor

# ------------- #

def process_water_depth(file_id, train_val_test='train_val', time_step=0):
    """
    Processes water depth data from a specific time step in a file.

    Args:
    file_id (str): Identifier of the water depth file to be processed.
    train_val_test: key for specifying what we are using the model for
                   'train_val' = train and validate the model
                   'test1' = test the model with dataset 1
                   'test2' = test the model with dataset 2
                   'test3' = test the model with dataset 3
    time_step (int): Time step to extract from the file. Default is the first time step. Default is the first time step.

    Returns:
    torch.Tensor or None: A 64x64 tensor representing water depth at the given time step, or None if the data is invalid.
    """
    # specify what we use the model for -- so far works for only one specified input (i.e., file_id), 
    # will need to be improved to work for all inputs regardless of the number of the file
    if train_val_test == 'train_val':
        file_path = path_train + f'WD/WD_{file_id}.txt'
    elif train_val_test == 'test1':
        file_path = path_test1 + f'WD/WD_{500 + file_id}.txt'
    elif train_val_test == 'test2':
        file_path = path_test2 + f'WD/WD_{10000 + file_id}.txt'
    elif train_val_test == 'test3':
        file_path = path_test3 + f'WD/WD_{15001 + file_id}.txt'

    # Read the file
    with open(file_path, 'r') as file:
        lines = file.readlines()

    try:
        # Extract the specified row and convert string elements to floats
        selected_row = lines[time_step].split()
        depth_values = [float(val) for val in selected_row]

        # Validate and reshape the data into a 64x64 tensor
        if len(depth_values) == 64 * 64:
            depth_tensor = torch.tensor(depth_values).view(64, 64)
            return depth_tensor
        else:
            raise ValueError(f"The number of elements in {file_path} at time step {time_step} doesn't match a 64x64 matrix.")
    except IndexError:
        raise IndexError(f"Time step {time_step} is out of range for the file {file_path}.")

# ------------- #

def process_velocities(file_id, train_val_test='train_val', time_step=0):
    """
    Processes elevation data from a DEM file.

    Input:
    file_id (str): Identifier of the DEM file to be processed.
    train_val_test: key for specifying what we are using the model for
                   'train_val' = train and validate the model
                   'test1' = test the model with dataset 1
                   'test2' = test the model with dataset 2
                   'test3' = test the model with dataset 3
    time_step (int): Time step to extract from the file. Default is the first time step. Default is the first time step.

    Output:
    torch.Tensor: A tensor combining the original elevation data and its slope in x and y directions.
    """
    # specify what we use the model for -- so far works for only one specified input (i.e., file_id), 
    # will need to be improved to work for all inputs regardless of the number of the file
    if train_val_test == 'train_val':
        file_path_x = path_train + f'VX/VX_{file_id}.txt'
        file_path_y = path_train + f'VY/VY_{file_id}.txt'
    
    elif train_val_test == 'test1':
        file_path_x = path_test1 + f'VX/VX_{500 + file_id}.txt'
        file_path_y = path_test1 + f'VY/VY_{500 + file_id}.txt'
    
    elif train_val_test == 'test2':
        file_path_x = path_test2 + f'VX/VX_{10000 + file_id}.txt'
        file_path_y = path_test2 + f'VY/VY_{10000 + file_id}.txt'
    
    elif train_val_test == 'test3':
        file_path_x = path_test3 + f'VX/VX_{15001 + file_id}.txt'
        file_path_y = path_test3 + f'VY/VY_{15001 + file_id}.txt'

    # Load the elevation data from the file
    vx, vy = np.loadtxt(file_path_x), np.loadtxt(file_path_y)

    # Read the file
    with open(file_path_x, 'r') as file:
        lines_x = file.readlines()
    
    with open(file_path_y, 'r') as file:
        lines_y = file.readlines()

    try:
        # Extract the specified row and convert string elements to floats
        selected_row_x = lines_x[time_step].split()
        selected_row_y = lines_y[time_step].split()
        
        vel_x = [float(val) for val in selected_row_x]
        vel_y = [float(val) for val in selected_row_y]

        # Validate and reshape the data into a 64x64 tensor
        if (len(vel_x) == 64 * 64) and (len(vel_y) == 64 * 64):
            vel_x = torch.tensor(vel_x).view(64, 64)
            vel_y = torch.tensor(vel_y).view(64, 64)
            return vel_x, vel_y
        else:
            raise ValueError(f"The number of elements in {file_path_x} or {file_path_y} at time step {time_step} doesn't match a 64x64 matrix.")
    except IndexError:
        raise IndexError(f"Time step {time_step} is out of range for the file {file_path_x} or {file_path_y}.")

# ------------- #

def compute_targets(file_id, train_val_test='train_val', time_step = 0):
    """
    Use process_velocities and process_water_depth to compute discharge

    Input:
    file_id (str): Identifier of the DEM file to be processed.
    train_val_test: key for specifying what we are using the model for
                   'train_val' = train and validate the model
                   'test1' = test the model with dataset 1
                   'test2' = test the model with dataset 2
                   'test3' = test the model with dataset 3

    Output:
    targets: A torch.tensor which is 2 x 64 x 64. Both targets are water depth and discharge respectively
    """
    water_depth = process_water_depth(file_id, train_val_test, time_step)
    vx, vy = process_velocities(file_id, train_val_test, time_step)

    magnitude = torch.sqrt(vx**2 + vy**2)
    discharge = water_depth * magnitude # per meter width

    targets = torch.stack((water_depth, discharge), dim=0)
    return targets

# ------------- #

# @jit(parallel=True)
def load_all_boys(train_val_test='train_val', time=97):
    '''
    Load all "file_id" and "time_step" for chosen dataset

    Input: 
    train_val_test = key for choosing dataset  
         = 'train_val', 'test1', 'test2', 'test3'
    time = time step of simulation # 97 is hardcoded !

    Output:
    inputs: torch.Tensor which contains DEM, slope x and y for all files in a dataset
            Shape is samples x 3 x 64 x 64
    targets: torch.Tensor which contains water depth and discharge for all files in a dataset.
             Shape is samples x time steps x 2 x 64 x 64
    '''
    if train_val_test == 'train_val':
        file_path = path_train
    elif train_val_test == 'test1':
        file_path = path_test1
    elif train_val_test == 'test2':
        file_path = path_test2
    elif train_val_test == 'test3':
        file_path = path_test3
    
    # if train_val_test == 'train_val' or train_val_test == 'test3': 
    count = 0
    dir_path = file_path + 'DEM' # Arbitrary choice as DEM, vx, vy and WD all have the same number of samples
    for path in os.listdir(dir_path):
        if os.path.isfile(os.path.join(dir_path, path)):
            count += 1
    inputs = torch.zeros((count, 3, 64, 64))
    targets = torch.zeros((count, time, 2, 64, 64))

    for i in range(count):
        print(i)
        inputs[i] = process_elevation_data(i + 1, train_val_test)
        for t in range(time):
            targets[i, t] = compute_targets(i + 1, train_val_test, time_step = t)
    return inputs, targets

In [12]:
inps1, targs1 = load_all_boys('test1')

0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19


FileNotFoundError: ../dataset1/DEM/DEM_520.txt not found.

In [16]:
inps1, targs1 = encode_into_csv(inps1, targs1, test1)

NameError: name 'inps1' is not defined

In [None]:
inps1, targs1 = decode_from_csv(test1)

Test dataset 2.

In [None]:
inps2, targs2 = load_all_boys(test2)

In [None]:
inps2, targs2 = encode_into_csv(inps2, targs2, test2)

In [None]:
inps1, targs1 = decode_from_csv(test2)

Test dataset 3.

In [22]:
# doesn't work, dataset 3 contains 128x128 grid data - need to change function
# inps3, targs3 = load_all_boys(test3)

In [23]:
# inps3, targs3 = encode_into_csv(inps3, targs3, test3)

In [24]:
# inps3, targs3 = decode_from_csv(test3)