In [1]:
import os
ROOT_DIR = "/home/ebr/projects/inundation-emulator"
os.chdir(ROOT_DIR)


In [None]:
from netCDF4 import Dataset
from collections import namedtuple
import random
from collections.abc import Generator
import numpy as np
import tensorflow as tf

import matplotlib.pyplot as plt

In [3]:
DATA_DIR = '/home/ebr/data/PTHA2020_runs_UMA/train_164'
TOPOFILE = '/home/ebr/data/PTHA2020_runs_UMA/Catania/C_CT.grd'
TOPO_MASK = '/home/ebr/data/PTHA2020_runs_UMA/Catania/ct_mask.txt'
#GENERATED_DIR = os.path.join(ROOT_DIR, 'generated')
#SUMMARIES_TRAIN = os.path.join(DATA_DIR, 'train_test/train.txt')
#SUMMARIES_VAL = os.path.join(DATA_DIR, 'train_test/validate.txt')

In [None]:
from pprint import pprint

def inspect_nc_file(file_path):
    # Open the NetCDF file
    dataset = Dataset(file_path, 'r')
    
    # Print general information about the dataset
    print(f"File: {file_path}")
    print(f"Dimensions:")
    pprint(dataset.dimensions)
    
    print(f"\nVariables:")
    pprint(dataset.variables)
    
    print(f"\nGlobal Attributes:")
    pprint(dataset.ncattrs())
    for attr in dataset.ncattrs():
        print(f"{attr}: {getattr(dataset, attr)}")
    
    # Close the dataset
    dataset.close()

# Example usage
file_path = '/home/ebr/data/PTHA2020_runs_UMA/train_164/1357_E02020N3739E02658N3366-PS-Mur_PYes_Var-M895_E02426N3465_S002_CT_10m.nc'
#file_path = "/home/ebr/data/PTHA2020_runs_UMA/Catania/A_CT.grd"
inspect_nc_file(file_path)


In [29]:
"""
Generator to create dataset.
"""
Scenario = namedtuple('Scenario', ['eta',
                                   'flow_depth'])


class DataReader:

    def __init__(self, 
                 scenarios_file, 
                 pois, 
                 datadir, 
                 topofile,
                 topo_mask_file=None, 
                 shuffle_on_load=False, 
                 reload=False):
        
        self.scenarios_file = scenarios_file
        self.pois = pois
        self.datadir = datadir
        self.topofile = topofile
        self.topo_mask_file = topo_mask_file
        self.shuffle_on_load = shuffle_on_load
        self.reload = reload
        self.lines = None

        with Dataset(self.topofile, 'r') as ds:
            self.topography = ds.variables["z"][:,:]
        
        if self.topo_mask_file:        
            with open(self.topo_mask_file, 'r') as file:
                lines = file.readlines()

            # Convert each line to a boolean (True for "true", False for "false")
            boolean_array = np.array([
                [element.strip().lower() == 'true' for element in line.split()]
                for line in lines
            ], dtype=bool)
            
            self.topo_mask = boolean_array.T
        
        self.load()

    def load(self):
        with open(self.scenarios_file, 'r') as file:
            self.lines = file.readlines()
            if self.shuffle_on_load:
                random.shuffle(self.lines)

    def generator(self):
        self.load()
        while self.lines:
            line = self.lines.pop()
            
            # Reload if no more lines and reload is true
            if not self.lines and self.reload:
                self.load()
            
            flow_depth, eta, deformed_topography = self.get_sample(line.strip())
            scenario = Scenario(eta=eta, flow_depth=flow_depth)
            yield scenario

    def get_sample(self, scenario):
        filename_CT = os.path.join(self.datadir, f"{scenario}_CT_10m.nc")
        filename_ts = os.path.join(self.datadir, f"{scenario}_ts.nc")
        
        # Initialize the eta array with the proper shape and type
        #eta = np.empty(self.in_dims, dtype=np.float32)
        with Dataset(filename_ts) as ds:
            eta = ds.variables["eta"][:, self.pois]

        # Initialize flow_depth and deformed_topography
        flow_depth = np.zeros(self.topography.shape)
        
        with Dataset(filename_CT) as ds:
            max_height = ds.variables["max_height"][:,:]
            deformation = ds.variables["deformation"][:,:]
            deformed_topography = self.topography - deformation

            # Create a mask and calculate flow_depth
            mask = (self.topography > 0) & (max_height != np.ma.masked) & (max_height > deformed_topography)
            flow_depth[mask] = (max_height - deformed_topography)[mask]
            
            if self.topo_mask_file:
                flow_depth = flow_depth[self.topo_mask]
                
        return flow_depth, eta.T, deformed_topography

    

In [30]:
pois = range(30,45)
n_pois = len(pois)

reader = DataReader(
    scenarios_file="/home/ebr/projects/inundation-emulator/training_set/scenarios.txt",
    pois=pois,
    datadir=DATA_DIR,
    topofile=TOPOFILE,
    topo_mask_file=TOPO_MASK,
    shuffle_on_load=False, 
    reload=False
)

In [None]:
n_pois

In [32]:

dataset = tf.data.Dataset.from_generator(
        generator=reader.generator,
        output_signature=(
                tf.TensorSpec(shape=(n_pois,481), dtype=tf.int32),
                tf.TensorSpec(shape=(reader.topo_mask.sum()), dtype=tf.int32)
        )
)

In [None]:
for i,element in enumerate(dataset):
    print(i)

In [None]:
# Batch the dataset
batch_size = 30
batched_dataset = dataset.batch(batch_size)

# Iterate over the batched dataset and print the results
for i, batch in enumerate(batched_dataset):
    eta, flow_depth = batch
    print(i, flow_depth.shape)
    

In [None]:
from keras import layers, models, Input, regularizers
reg = 1e-5

def get_model():
    # Encoder
    encoder = models.Sequential([
        Input(shape=(n_pois,481,1)),
        layers.Conv2D(32, (3, 3), activation='relu', strides=(1, 1), use_bias=False, kernel_regularizer=regularizers.l2(reg)),
        layers.MaxPooling2D(pool_size=(3, 3), strides=(1, 2)),
        layers.Conv2D(64, (3, 5), activation='relu', strides=(1, 1), use_bias=False, kernel_regularizer=regularizers.l2(reg)),
        layers.MaxPooling2D(pool_size=(3, 5), strides=(2, 3)),
        layers.Conv2D(128, (3, 5), activation='relu', strides=(1, 1), padding='same', use_bias=False, kernel_regularizer=regularizers.l2(reg)),
        layers.MaxPooling2D(pool_size=(3, 5), strides=(2, 3)),
        layers.Conv2D(32, (1, 1), activation='relu', strides=(1, 1), use_bias=True, kernel_regularizer=regularizers.l2(reg)),
        layers.Flatten(),
        layers.Dropout(0.5),
        layers.Dense(16, activation='relu', use_bias=True, kernel_regularizer=regularizers.l2(reg))
    ])
    
    # Decoder
    decoder = models.Sequential([
        layers.Dense(32, activation='relu', use_bias=True, kernel_regularizer=regularizers.l2(reg)),
        layers.Dense(418908, activation='relu', use_bias=True, kernel_regularizer=regularizers.l2(reg))
    ])
    
    # Complete Model
    model = models.Sequential([encoder, decoder])
    
    return model

# Instantiate and build the model
model = get_model()
model.summary()

In [None]:
model.layers[0].summary()

In [None]:
model(eta).shape

In [None]:

batch_size = 10  # Adjust to your requirements
dataset_size = 164
epochs = 10
steps_per_epoch = int(dataset_size/batch_size)

# Create dataset from generator
dataset = tf.data.Dataset.from_generator(
        generator=reader.generator,
        output_signature=(
                tf.TensorSpec(shape=(n_pois, 481), dtype=tf.int32),
                tf.TensorSpec(shape=(reader.topo_mask.sum()), dtype=tf.int32)
        )
).cache().shuffle(buffer_size=dataset_size)
batched_dataset = dataset.batch(batch_size)#.prefetch(tf.data.AUTOTUNE)

# Define the model (already done in previous steps)
model = get_model()

# Compile the model with a loss function and optimizer
model.compile(optimizer='adam', 
              loss="mse",  # Adjust as per your task (e.g., binary crossentropy or MSE)
              metrics=['mse'])

# Fit the model
model.fit(batched_dataset, epochs=epochs)#, steps_per_epoch=steps_per_epoch)


In [None]:
%tensorboard generated/emulator_20250117_094513/logs/fit