In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import random
import os
from urllib.request import urlretrieve

from sklearn.preprocessing import StandardScaler,MinMaxScaler
from sklearn.model_selection import train_test_split

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import TensorDataset, DataLoader

# Imported because Roberto also did it.
# from torch_geometric.data import Data

# Load the data

In [3]:
sim = 42
number_grids = 64

In [4]:
# Lets do a random model
save_folder = "data/raw_datasets/"

topo = np.loadtxt(f"{save_folder}\\DEM\\DEM_{sim}.txt")[:, 2].reshape(number_grids,number_grids)
vals = np.loadtxt(f"{save_folder}\\WD\\WD_{sim}.txt").reshape(-1,number_grids,number_grids)

In [5]:
display(topo)

array([[ 0.     ,  0.03362,  0.02552, ..., -0.07375,  0.05459,  0.14767],
       [-0.12979, -0.09082, -0.06958, ...,  0.00577,  0.13852,  0.23505],
       [-0.16733, -0.1261 , -0.08684, ...,  0.09362,  0.22339,  0.31521],
       ...,
       [ 0.39018,  0.38217,  0.29131, ..., -0.48051, -0.41116, -0.25394],
       [ 0.40054,  0.34883,  0.25079, ..., -0.47472, -0.39348, -0.21938],
       [ 0.30491,  0.2184 ,  0.13165, ..., -0.44601, -0.37985, -0.21325]])

In [None]:
def convert_to_pyg(graph, pos, DEM, WD, VX, VY):
    '''
    Converts a graph or mesh into a PyTorch Geometric Data type 
    Then, add position, DEM, and water variables to data object.
    Adapted from https://github.com/RBTV1/SWE-GNN-paper-repository-/blob/main/database/graph_creation.py
    '''
    DEM = DEM.reshape(-1)

    edge_index = torch.LongTensor(list(graph.edges)).t().contiguous()
    row, col = edge_index

    data = Data()

    delta_DEM = torch.FloatTensor(DEM[col]-DEM[row])
    coords = torch.FloatTensor(get_coords(pos))
    edge_relative_distance = coords[col] - coords[row]
    edge_distance = torch.norm(edge_relative_distance, dim=1)
    edge_slope = delta_DEM/edge_distance

    data.edge_index = edge_index
    data.edge_distance = edge_distance
    data.edge_slope = edge_slope
    data.edge_relative_distance = edge_relative_distance

    data.num_nodes = graph.number_of_nodes()
    data.pos = torch.tensor(list(pos.values()))
    data.DEM = torch.FloatTensor(DEM)
    data.WD = torch.FloatTensor(WD.T)
    data.VX = torch.FloatTensor(VX.T)
    data.VY = torch.FloatTensor(VY.T)
        
    return data

# Normalize the data

In [7]:
def scale_sequences(X,scaler=None,scaler_type='standard'):
    """
    Uses a standard scaler to transform sequences. The scaler is created if no scaler is passed as argument.
    Adapted from exercise notebook on drinking water demand.
    """
    Xshape=X.shape
    if scaler:
        X = scaler.transform(X.reshape(-1,1)).reshape(Xshape)
        return X
    else:
        if scaler_type == 'standard':
            scaler = StandardScaler()
        elif scaler_type == 'minmax':
            scaler = MinMaxScaler()
        else:
            raise Exception("Type of scikit-learn scaler not supported. Choose 'standard' or 'minmax.")
        X = scaler.fit_transform(X.reshape(-1,1)).reshape(Xshape)
        return X, scaler

In [None]:
# We keep track of indexes of train and validation.
X_tra, X_tst, Y_tra, Y_tst, ix_tra, ix_tst = train_test_split(
    X, Y, np.arange(X.shape[0]), test_size=0.30, shuffle=True, random_state=42)

# Split the existing test dataset into validation and test sets (50/50 split)
X_val, X_tst, Y_val, Y_tst, ix_val, ix_tst = train_test_split(
    X_tst, Y_tst, ix_tst, test_size=0.5, shuffle=True, random_state=42)


print(f"X_tra.shape: {X_tra.shape}, Y_tra.shape: {Y_tra.shape}")
print(f"X_val.shape: {X_val.shape}, Y_val.shape: {Y_val.shape}")
print(f"X_tst.shape: {X_tst.shape}, Y_tst.shape: {Y_tst.shape}")

The following block is from the landuse CNN.

In [None]:
def load_images(file_paths, transform, folder_path=os.path.join(dataset_folder, 'Images/Images/')):
    images = []
    for file_path in tqdm(file_paths, desc='Loading images'):
        # Load the image
        with Image.open(folder_path+file_path) as img:
            # Convert image to RGB if it's not and apply the same basic transformations
            img = img.convert('RGB')
            img = transform(img)
            images.append(img)
    return torch.stack(images)

def calculate_mean_std(stacked_images):
    # Mean and std are calculated across the height and width dimensions (2 and 3)
    mean = stacked_images.view(stacked_images.size(0), stacked_images.size(1), -1).mean(dim=2).mean(dim=0)
    std = stacked_images.view(stacked_images.size(0), stacked_images.size(1), -1).std(dim=2).mean(dim=0)
    return mean, std

# Basic image transformations to load the training dataset
basic_transform = transforms.Compose([
    transforms.Resize((256, 256)),
    transforms.ToTensor()
])

# Load images for the mean/std calculation
original_train_images = load_images(train_paths, basic_transform)

# Calculate mean and std
mean, std = calculate_mean_std(original_train_images)

# Create the normalization transform using mean and std
normalize_transform = transforms.Normalize(mean=mean, std=std)

# Apply the normalization to each original training image
original_train_images = torch.stack([normalize_transform(image) for image in original_train_images])