In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import random
import os
import glob
from urllib.request import urlretrieve
from sklearn.preprocessing import StandardScaler,MinMaxScaler
from sklearn.model_selection import train_test_split
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import TensorDataset, DataLoader

# Imported because Roberto also did it.
# from torch_geometric.data import Data

# Load the data

In [2]:
# Load the dem, waterdepth, x_velocity, y_velocity data
path = "data/raw_datasets/" 

# DEM files 
dem_tv = glob.glob(path + 'DEM_tra_val/*.txt')
dem_t1 = glob.glob(path + 'DEM_test1/*.txt')
dem_t2 = glob.glob(path + 'DEM_test2/*.txt')
dem_t3 = glob.glob(path + 'DEM_test3/*.txt')
                      
# Water Depth files                      
wd_tv = glob.glob(path + 'WD_tra_val/*.txt')
wd_t1 = glob.glob(path + 'WD_test1/*.txt')
wd_t2 = glob.glob(path + 'WD_test2/*.txt')
wd_t3 = glob.glob(path + 'WD_test3/*.txt')
                      
# Velocity x files 
vx_tv = glob.glob(path + 'VX_tra_val/*.txt')
vx_t1 = glob.glob(path + 'VX_test1/*.txt')
vx_t2 = glob.glob(path + 'VX_test2/*.txt')
vx_t3 = glob.glob(path + 'VX_test3/*.txt')
                      
# Velocity y files 
vy_tv = glob.glob(path + 'vy_tra_val/*.txt')
vy_t1 = glob.glob(path + 'vy_test1/*.txt')
vy_t2 = glob.glob(path + 'vy_test2/*.txt')
vy_t3 = glob.glob(path + 'vy_test3/*.txt')
                     
dem_tra_val = []
dem_tst1 = []
dem_tst2 = []
dem_tst3 = []
                    
wd_tra_val = []
wd_tst1 = []
wd_tst2 = []
wd_tst3 = []

vx_tra_val = []
vx_tst1 = []
vx_tst2 = []
vx_tst3 = []

vy_tra_val = []
vy_tst1 = []
vy_tst2 = []
vy_tst3 = []

for i in range(len(dem_tv)):
    dem_tra_val.append(np.loadtxt(dem_tv[i]))
    wd_tra_val.append(np.loadtxt(wd_tv[i]))
    vx_tra_val.append(np.loadtxt(vx_tv[i]))
    vy_tra_val.append(np.loadtxt(vy_tv[i]))
                  
for i in range(len(dem_t1)):
    dem_tst1.append(np.loadtxt(dem_t1[i]))
    wd_tst1.append(np.loadtxt(wd_t1[i]))
    vx_tst1.append(np.loadtxt(vx_t1[i]))
    vy_tst1.append(np.loadtxt(vy_t1[i]))

for i in range(len(dem_t2)):
    dem_tst2.append(np.loadtxt(dem_t2[i]))
    wd_tst2.append(np.loadtxt(wd_t2[i]))
    vx_tst2.append(np.loadtxt(vx_t2[i]))
    vy_tst2.append(np.loadtxt(vy_t2[i]))
                  
for i in range(len(dem_t3)):
    dem_tst3.append(np.loadtxt(dem_t3[i]))
    wd_tst3.append(np.loadtxt(wd_t3[i]))
    vx_tst3.append(np.loadtxt(vx_t3[i]))
    vy_tst3.append(np.loadtxt(vy_t3[i]))

In [3]:
# # Extract DEM data at specific simulation
# dem_sim = np.reshape(dem[sim][:,2],(number_grids,number_grids))

# # Extract the water depth, vx, and vy value at specific time
# wd_sim = np.reshape(wd[sim][t],(number_grids,number_grids))
# vx_sim = np.reshape(vx[sim][t],(number_grids,number_grids))
# vy_sim = np.reshape(vy[sim][t],(number_grids,number_grids))

# Split the train and validation dataset 

In [4]:
# Split the dataset (in total 80) into following portion: training(70%) and validation(30%)
# We already have the testing dataset

# Water depth
wd_tra, wd_val, iwd_tra, iwd_val = train_test_split(
    wd_tra_val, np.arange(len(wd_tra_val)), train_size=0.7, shuffle=True, random_state=42)

# X velocity
vx_tra, vx_val, ivx_tra, ivx_val = train_test_split(
    vx_tra_val, np.arange(len(vx_tra_val)), train_size=0.7, shuffle=True, random_state=42)

# Y velocity
vy_tra, vy_val, ivy_tra, ivy_val = train_test_split(
    vy_tra_val, np.arange(len(vy_tra_val)), train_size=0.7, shuffle=True, random_state=42)


print(f"len(vx_tra): {len(wd_tra)}")
print(f"len(vx_val): {len(wd_val)}")
print(f"len(vx_tst1): {len(wd_tst1)}")
print(f"len(vx_tst2): {len(wd_tst2)}")
print(f"len(vx_tst3): {len(wd_tst3)}")

# Note that the form of the all the training, testing and validating dataset are "a list containing multiple arrays".

len(vx_tra): 56
len(vx_val): 24
len(vx_tst1): 20
len(vx_tst2): 21
len(vx_tst3): 10


In [5]:
# Stacking all the simulation for normalization 
# Note that each of the simulation contains t = 0 - 96
wd_tra_arr = wd_tra[0]
wd_val_arr = wd_val[0]
wd_tst1_arr = wd_tst1[0]
wd_tst2_arr = wd_tst2[0]
wd_tst3_arr = wd_tst3[0]

vx_tra_arr = vx_tra[0]
vx_val_arr = vx_val[0]
vx_tst1_arr = vx_tst1[0]
vx_tst2_arr = vx_tst2[0]
vx_tst3_arr = vx_tst3[0]

vy_tra_arr = vy_tra[0]
vy_val_arr = vy_val[0]
vy_tst1_arr = vy_tst1[0]
vy_tst2_arr = vy_tst2[0]
vy_tst3_arr = vy_tst3[0]

for i in range(1,len(wd_tra)):
    wd_tra_arr = np.concatenate((wd_tra_arr, wd_tra[i]),axis=0)
    vx_tra_arr = np.concatenate((vx_tra_arr, vx_tra[i]),axis=0)
    vy_tra_arr = np.concatenate((vy_tra_arr, vy_tra[i]),axis=0)

for i in range(1,len(wd_val)):
    wd_val_arr = np.concatenate((wd_val_arr, wd_val[i]),axis=0)
    vx_val_arr = np.concatenate((vx_val_arr, vx_val[i]),axis=0)
    vy_val_arr = np.concatenate((vy_val_arr, vy_val[i]),axis=0)
    
for i in range(1,len(wd_tst1)):
    wd_tst1_arr = np.concatenate((wd_tst1_arr, wd_tst1[i]),axis=0)
    vx_tst1_arr = np.concatenate((vx_tst1_arr, vx_tst1[i]),axis=0)
    vy_tst1_arr = np.concatenate((vy_tst1_arr, vy_tst1[i]),axis=0)
    
for i in range(1,len(wd_tst2)):
    wd_tst2_arr = np.concatenate((wd_tst2_arr, wd_tst2[i]),axis=0)
    vx_tst2_arr = np.concatenate((vx_tst2_arr, vx_tst2[i]),axis=0)
    vy_tst2_arr = np.concatenate((vy_tst2_arr, vy_tst2[i]),axis=0)
    
for i in range(1,len(wd_tst3)):
    wd_tst3_arr = np.concatenate((wd_tst3_arr, wd_tst3[i]),axis=0)
    vx_tst3_arr = np.concatenate((vx_tst3_arr, vx_tst3[i]),axis=0)
    vy_tst3_arr = np.concatenate((vy_tst3_arr, vy_tst3[i]),axis=0)

# Data Normalization

In [6]:
# Here use the Min-max normalization to normalize the water depth over the entire simulation sequence (0-130)
def scale_sequences(X,scaler=None,scaler_type='minmax'):
    """
    Uses a minmax scaler to transform sequences. The scaler is created if no scaler is passed as argument.
    Adapted from exercise notebook on drinking water demand.
    
    The input parameter X is a two-dimensional array.
    """
    
    Xshape=X.shape
    if scaler:
        X = scaler.transform(X.reshape(-1,1)).reshape(Xshape)
        return X
    else:
        if scaler_type == 'standard':
            scaler = StandardScaler()
        elif scaler_type == 'minmax':
            scaler = MinMaxScaler()
        else:
            raise Exception("Type of scikit-learn scaler not supported. Choose 'standard' or 'minmax.")
        X = scaler.fit_transform(X.reshape(-1,1)).reshape(Xshape)
        return X, scaler
    
def denormalize(image_tensor, mean, std):
    # Denormalize the image
    denorm_img = image_tensor * std[:, None, None] + mean[:, None, None]
    # Clip values to be between 0 and 1
    denorm_img = denorm_img.clip(0, 1)
    return denorm_img


In [7]:
# normalization of waterdepth and velocity
wd_tra_scale, wd_tra_scaler = scale_sequences(wd_tra_arr ,scaler=None,scaler_type='minmax')
wd_val_scale, wd_val_scaler = scale_sequences(wd_val_arr ,scaler=None,scaler_type='minmax')
wd_tst1_scale, wd_tst1_scaler = scale_sequences(wd_tst1_arr ,scaler=None,scaler_type='minmax')
wd_tst2_scale, wd_tst2_scaler = scale_sequences(wd_tst2_arr ,scaler=None,scaler_type='minmax')
wd_tst3_scale, wd_tst3_scaler = scale_sequences(wd_tst3_arr ,scaler=None,scaler_type='minmax')

vx_tra_scale, vx_tra_scaler = scale_sequences(vx_tra_arr ,scaler=None,scaler_type='minmax')
vx_val_scale, vx_val_scaler = scale_sequences(vx_val_arr ,scaler=None,scaler_type='minmax')
vx_tst1_scale, vx_tst1_scaler = scale_sequences(vx_tst1_arr ,scaler=None,scaler_type='minmax')
vx_tst2_scale, vx_tst2_scaler = scale_sequences(vx_tst2_arr ,scaler=None,scaler_type='minmax')
vx_tst3_scale, vx_tst3_scaler = scale_sequences(vx_tst3_arr ,scaler=None,scaler_type='minmax')

vy_tra_scale, vy_tra_scaler = scale_sequences(vy_tra_arr ,scaler=None,scaler_type='minmax')
vy_val_scale, vy_val_scaler = scale_sequences(vy_val_arr ,scaler=None,scaler_type='minmax')
vy_tst1_scale, vy_tst1_scaler = scale_sequences(vy_tst1_arr ,scaler=None,scaler_type='minmax')
vy_tst2_scale, vy_tst2_scaler = scale_sequences(vy_tst2_arr ,scaler=None,scaler_type='minmax')
vy_tst3_scale, vy_tst3_scaler = scale_sequences(vy_tst3_arr ,scaler=None,scaler_type='minmax')


In [8]:
# After normalization all the data, split them into the original form (131 simulations with t = 0-96 of each)
wd_tra_norm = np.vsplit(wd_tra_scale,len(wd_tra))
wd_val_norm = np.vsplit(wd_val_scale,len(wd_val))
wd_tst1_norm = np.vsplit(wd_tst1_scale,len(wd_tst1))
wd_tst2_norm = np.vsplit(wd_tst2_scale,len(wd_tst2))
wd_tst3_norm = np.vsplit(wd_tst3_scale,len(wd_tst3))

vx_tra_norm = np.vsplit(vx_tra_scale,len(vx_tra))
vx_val_norm = np.vsplit(vx_val_scale,len(vx_val))
vx_tst1_norm = np.vsplit(vx_tst1_scale,len(vx_tst1))
vx_tst2_norm = np.vsplit(vx_tst2_scale,len(vx_tst2))
vx_tst3_norm = np.vsplit(vx_tst3_scale,len(vx_tst3))

vy_tra_norm = np.vsplit(vy_tra_scale,len(vy_tra))
vy_val_norm = np.vsplit(vy_val_scale,len(vy_val))
vy_tst1_norm = np.vsplit(vy_tst1_scale,len(vy_tst1))
vy_tst2_norm = np.vsplit(vy_tst2_scale,len(vy_tst2))
vy_tst3_norm = np.vsplit(vy_tst3_scale,len(vy_tst3))

In [9]:
# Convert all the data to np.array

wd_tra_norm = np.array(wd_tra_norm)
wd_val_norm = np.array(wd_val_norm)
wd_tst1_norm = np.array(wd_tst1_norm)
wd_tst2_norm = np.array(wd_tst2_norm)
wd_tst3_norm = np.array(wd_tst3_norm)

vx_tra_norm = np.array(vx_tra_norm)
vx_val_norm = np.array(vx_val_norm)
vx_tst1_norm = np.array(vx_tst1_norm)
vx_tst2_norm = np.array(vx_tst2_norm)
vx_tst3_norm = np.array(vx_tst3_norm)

vy_tra_norm = np.array(wd_tra_norm)
vy_val_norm = np.array(vy_val_norm)
vy_tst1_norm = np.array(vy_tst1_norm)
vy_tst2_norm = np.array(vy_tst2_norm)
vy_tst3_norm = np.array(vy_tst3_norm)

In [10]:
dataset_tra = [wd_tra_norm, vx_tra_norm, vy_tra_norm]
datalist_tra = ['wd_tra_norm', ' vx_tra_norm', 'vy_tra_norm']


# [wd_tra_norm, wd_val_norm, wd_tst1_norm, wd_tst2_norm ,wd_tst3_norm, 
#            vx_tra_norm, vx_val_norm, vx_tst1_norm, vx_tst2_norm, vx_tst3_norm, 
#            vy_tra_norm, vy_val_norm, vy_tst1_norm, vy_tst2_norm, vy_tst3_norm]
# dataset_val = 
# dataset_tst = 
# datalist = ['wd_tra_norm', 'wd_val_norm', 'wd_tst1_norm', 'wd_tst2_norm', 'wd_tst3_norm', 
#            'vx_tra_norm', 'vx_val_norm', 'vx_tst1_norm', 'vx_tst2_norm', 'vx_tst3_norm', 
#            'vy_tra_norm', 'vy_val_norm', 'vy_tst1_norm', 'vy_tst2_norm', 'vy_tst3_norm']

In [2]:
import os
import pathlib

In [11]:
path = pathlib.Path().resolve()
proj_dir = str(path.parent)
save_path = proj_dir + "\data\processed_data/"

print(save_path)

C:\Users\sagi8\Documents\GitHub\data\processed_data/


In [12]:
for ds, fname in zip(dataset_tra, datalist_tra):
    np.savetxt(save_path+fname, ds, delimiter=';')

NameError: name 'dataset_tra' is not defined

# Datasets and Data Loaders

In [12]:
# wdtrain_dataset = TensorDataset(torch.tensor(wd_tra, dtype=torch.float32))
# wdval_dataset = TensorDataset(torch.tensor(wd_val, dtype=torch.float32))
# wdtest_dataset = TensorDataset(torch.tensor(wd_tst, dtype=torch.float32))

# vxtrain_dataset = TensorDataset(torch.tensor(vx_tra, dtype=torch.float32))
# vxval_dataset = TensorDataset(torch.tensor(vx_val, dtype=torch.float32))
# vxtest_dataset = TensorDataset(torch.tensor(vx_tst, dtype=torch.float32))

# vytrain_dataset = TensorDataset(torch.tensor(vy_tra, dtype=torch.float32))
# vyval_dataset = TensorDataset(torch.tensor(vy_val, dtype=torch.float32))
# vytest_dataset = TensorDataset(torch.tensor(vy_tst, dtype=torch.float32))

In [13]:
# def convert_to_pyg(graph, pos, DEM, WD, VX, VY):
#     '''
#     Converts a graph or mesh into a PyTorch Geometric Data type 
#     Then, add position, DEM, and water variables to data object.
#     Adapted from https://github.com/RBTV1/SWE-GNN-paper-repository-/blob/main/database/graph_creation.py
#     '''
#     DEM = DEM.reshape(-1)

#     edge_index = torch.LongTensor(list(graph.edges)).t().contiguous()
#     row, col = edge_index

#     data = Data()

#     delta_DEM = torch.FloatTensor(DEM[col]-DEM[row])
#     coords = torch.FloatTensor(get_coords(pos))
#     edge_relative_distance = coords[col] - coords[row]
#     edge_distance = torch.norm(edge_relative_distance, dim=1)
#     edge_slope = delta_DEM/edge_distance

#     data.edge_index = edge_index
#     data.edge_distance = edge_distance
#     data.edge_slope = edge_slope
#     data.edge_relative_distance = edge_relative_distance

#     data.num_nodes = graph.number_of_nodes()
#     data.pos = torch.tensor(list(pos.values()))
#     data.DEM = torch.FloatTensor(DEM)
#     data.WD = torch.FloatTensor(WD.T)
#     data.VX = torch.FloatTensor(VX.T)
#     data.VY = torch.FloatTensor(VY.T)
        
#     return data