<img src="../assets/header_notebook.png" />
<hr style="color:#5A7D9F;">
<p align="center">
    <b style="font-size:2vw; color:#5A7D9F; font-weight:bold;">
    <center>ESA - Black Sea Deoxygenation Emulator</center>
    </b>
</p>
<hr style="color:#5A7D9F;">

In [None]:
# ----------
# Librairies
# ----------
import os
import sys
import cv2
import dawgz
import wandb
import xarray
import random
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

# Pytorch
import torch
import torch.nn as nn
import torch.optim as optim

# Dawgz (jobs //)
from dawgz import job, schedule

# -------------------
# Librairies (Custom)
# -------------------
# Adding path to source folder to load custom modules
sys.path.append('/src')
sys.path.append('/src/debs/')
sys.path.insert(1, '/src/debs/')
sys.path.insert(1, '/scripts/')

# Moving to the .py directory
%cd src/debs/

## Loading libraries
from metrics     import *
from dataset     import *
from dataloader  import *
from tools       import *
from losses      import *

# -------
# Jupyter
# -------
%matplotlib inline
plt.rcParams.update({'font.size': 13})

# Making sure modules are reloaded when modified
%reload_ext autoreload
%autoreload 2

<hr style="color:#5A7D9F;">
<p align="center">
    <b style="font-size:2vw; color:#5A7D9F; font-weight:bold;">
    <center>SCRIPTS</center>
    </b>
</p>
<hr style="color:#5A7D9F;">

In [None]:
# Generating the preprocess data
%run generate.py

In [None]:
# Training a neural network:
%run __training.py --config local

<hr style="color:#5A7D9F;">
<p align="center">
    <b style="font-size:2vw; color:#5A7D9F; font-weight:bold;">
    <center>Playground</center>
    </b>
</p>
<hr style="color:#5A7D9F;">

In [None]:
# -----------------
#    Parameters
# -----------------
#
# Time window
month_starting = 1
month_ending   = 3
year_starting  = 1980
year_ending    = 1980

# ------------------
#  Loading the data
# ------------------
# Loading the different datasets
BSD_dataset = BlackSea_Dataset(year_start  = year_starting,
                               year_end    = year_ending,
                               month_start = month_starting,
                               month_end   = month_ending)

# Loading the days ID (used to give temporal information to the model)
days_ID = BSD_dataset.get_days()

# Loading the different inputs
data_temperature   = BSD_dataset.get_data(variable = "temperature")
data_salinity      = BSD_dataset.get_data(variable = "salinity")
data_chlorophyll   = BSD_dataset.get_data(variable = "chlorophyll")
data_kshort        = BSD_dataset.get_data(variable = "kshort")
data_klong         = BSD_dataset.get_data(variable = "klong")

# Loading the output
data_oxygen = BSD_dataset.get_data(variable = "oxygen")

# Loading spatial information
bathy = BSD_dataset.get_depth(unit = "meter")
mesh  = BSD_dataset.get_mesh(x = 256, y = 576)

# Loading the black sea masks
bs_mask             = BSD_dataset.get_mask(continental_shelf = False)
bs_mask_with_depth  = BSD_dataset.get_mask(continental_shelf = True)
bs_mask_complete    = get_complete_mask(data_oxygen, bs_mask_with_depth)

# Hypoxia treshold
hypox_tresh = xarray.open_dataset(BSD_dataset.paths[0])["HYPON"].data.item()

# -----------------------
#  Preprocessing the data
# -----------------------
#
# Creation of the dataloader
BSD_loader = BlackSea_Dataloader(x = [data_temperature],
                                 y = data_oxygen,
                                 t = days_ID,
                              mesh = mesh,
                              mask = bs_mask,
                   mask_with_depth = bs_mask_with_depth,
                        bathymetry = bathy,
                        window_inp = 1,
                        window_out = 1,
                    window_transfo = 1,
                              mode = "regression",
                  hypoxia_treshold = hypox_tresh)

# Retrieving the datasets
ds_train      = BSD_loader.get_dataloader("train")
ds_validation = BSD_loader.get_dataloader("validation")
ds_test       = BSD_loader.get_dataloader("test")

# Extracting the normalized oxygen treshold value
norm_oxy = BSD_loader.get_normalized_deoxygenation_treshold()


In [None]:
for x, t, y in ds_train:

    print(x.shape)

    break

In [None]:
# -------------------------------------------------------
#
#        |
#       / \
#      / _ \                  ESA - PROJECT
#     |.o '.|
#     |'._.'|          BLACK SEA DEOXYGENATION EMULATOR
#     |     |
#   ,'|  |  |`.             BY VICTOR MANGELEER
#  /  |  |  |  \
#  |,-'--|--'-.|                2023-2024
#
#
# -------------------------------------------------------
#
# Documentation
# -------------
# A neural network definition to be used as temporal encoder
#
# Pytorch
import torch.nn as nn


class ENCODER(nn.Sequential):
    r"""A neural network used to encode the temporal information of the data and return weights for the input data"""

    def __init__(self, input_size : int):
        super(ENCODER, self).__init__()

        # Defining the layers
        self.linear_in       = nn.Linear(input_size, 256)
        self.linear_middle_1 = nn.Linear(256,        256)
        self.linear_middle_2 = nn.Linear(256,        128)
        self.linear_middle_3 = nn.Linear(128,         64)
        self.linear_middle_4 = nn.Linear(64,          32)
        self.linear_out      = nn.Linear(32,           1)

        # Defining the activation functions
        self.activation = nn.GELU()

        # Defining the softmax function, i.e. (t, values, day) to (t, values, 1) then (t, weights, 1)
        self.softmax = nn.Softmax(dim = 0)

    def forward(self, x):

        # Applying the layers
        x = self.activation(self.linear_in(x))
        x = self.activation(self.linear_middle_1(x))
        x = self.activation(self.linear_middle_2(x))
        x = self.activation(self.linear_middle_3(x))
        x = self.activation(self.linear_middle_4(x))
        x = self.linear_out(x)

        # Applying the softmax function
        return self.softmax(x)

    def count_parameters(self,):
        r"""Determines the number of trainable parameters in the model"""
        return int(sum(p.numel() for p in self.parameters() if p.requires_grad))

class FCNN(nn.Sequential):
    r"""A fully convolutional neural network"""

    def __init__(self, problem: str, inputs: int, outputs: int, window_transformation: int = 1, kernel_size : int = 3, scaling : int = 1):
        super(FCNN, self).__init__()

        # Initialization
        self.n_in    = inputs
        self.problem = problem
        self.padding = kernel_size // 2

        # Number of output channels, i.e. times 2 because either mean and std for regression or both classes for classification
        self.n_out   = outputs * 2

        # ------ Architecture ------
        #
        # Temporal Encoder
        self.block_encoder = ENCODER(window_transformation)

        # Main Layers
        self.conv_init           = nn.Conv2d(self.n_in    , 256 * scaling, kernel_size, padding = self.padding)
        self.conv_intermediate_1 = nn.Conv2d(256 * scaling, 128 * scaling, kernel_size, padding = self.padding)
        self.conv_intermediate_2 = nn.Conv2d(128 * scaling,  64 * scaling, kernel_size, padding = self.padding)
        self.conv_intermediate_3 = nn.Conv2d( 64 * scaling,  32 * scaling, kernel_size, padding = self.padding)
        self.conv_final          = nn.Conv2d( 32 * scaling,    self.n_out, kernel_size, padding = self.padding)

        # Activation function
        self.activation = nn.GELU()

        # Normalization
        self.normalization_init           = nn.BatchNorm2d(self.conv_init.out_channels)
        self.normalization_intermediate_1 = nn.BatchNorm2d(self.conv_intermediate_1.out_channels)
        self.normalization_intermediate_2 = nn.BatchNorm2d(self.conv_intermediate_2.out_channels)
        self.normalization_intermediate_3 = nn.BatchNorm2d(self.conv_intermediate_3.out_channels)

    def forward(self, x, t):

        # Retrieiving dimensions (Ease of comprehension)
        samples, days, values, variables, x_res, y_res = x.shape

        # ----- Encoding Time -----
        #
        # Applying the encoder
        weights = torch.squeeze(self.block_encoder(t), dim = -1)

        # Applying the weights (except to mesh (dim = 2) and bathymetry (dim = 3))
        for sample in range(samples):
            for value in range(days):
                x[:, value, :, :-3] *= weights[sample, value]

        # Reshaping
        x = x.reshape(samples, days * values * variables, x_res, y_res)

        # ----- Fully Convolutionnal -----
        #
        x = self.normalization_init(self.activation(self.conv_init(x)))
        x = self.normalization_intermediate_1(self.activation(self.conv_intermediate_1(x)))
        x = self.normalization_intermediate_2(self.activation(self.conv_intermediate_2(x)))
        x = self.normalization_intermediate_3(self.activation(self.conv_intermediate_3(x)))
        x = self.conv_final(x)

        # Retrieiving dimensions (Ease of comprehension)
        b, c, x_res, y_res = x.shape

        # Reshaping the output, i.e. (samples, days, values, x, y)
        return x.reshape(b, self.n_out // 2, 2, x_res, y_res)

    def count_parameters(self,):
        r"""Determines the number of trainable parameters in the model"""
        return int(sum(p.numel() for p in self.parameters() if p.requires_grad))

class AVERAGE(nn.Sequential):
    r"""A 'neural network' that predicts the pixel temporal average (should be used a baseline)"""

    def __init__(self, data_output : np.array, device : str, kwargs : dict):
        super(AVERAGE, self).__init__()

        # Extracting information
        dataset_size     = [0.6, 0.3]
        problem          = "regression"
        hypoxia_treshold = 0.1

        # Retrieiving dimensions
        t, x, y = data_output.shape

        # Number of training samples
        train_samples = int(t * dataset_size[0])

        # ----- Regression ------
        if problem == "regression":

            # Determine the minimum and maximum values of the data
            min_value = np.nanmin(data_output)
            max_value = np.nanmax(data_output)

            # Determining the minimum and maximum values
            min_value = np.nanmin(data_output)
            max_value = np.nanmax(data_output)

            # Shift the data to ensure minimum value is 0
            shifted_data = data_output - min_value

            # Normalizing the data
            normalized_data = shifted_data / (max_value - min_value)

            # Predicting the average and log of variance
            average_output = torch.mean(torch.from_numpy(normalized_data[: train_samples, :, :]), dim = 0)
            std_output     = torch.log(torch.var(torch.from_numpy(normalized_data[: train_samples, :, :]), dim = 0))

            # Stacking
            average_output = torch.stack([average_output, std_output])

        # ----- Classification ------
        else:

            # Converting to classification
            average_output = torch.from_numpy((data_output[: train_samples, :, :] < hypoxia_treshold) * 1)

            # Summing over time, i.e. if total number of hypoxic days is greater than 50% of the time, then it is hypoxic
            average_output = (torch.sum(average_output, dim = 0) > train_samples // 2) * 1

            # Conversion to "probabilities", i.e. (t, x, y) to (t, c, x, y) with c = 0 no hypoxia, c = 1 hypoxia
            average_output = torch.stack([(average_output == 0) * 1, average_output]).float()

        # Storing information
        self.outputs = 1
        self.bs      = 64
        self.average = self.process(average_output)
        self.device  = "cuda"

        # Dummy feature (It plays no role whatsoever, it is just a placeholder to make the model work with the trainer)
        self.layer = nn.Conv2d(1, 1, 1)

    def forward(self, x, t):
        return to_device(self.average[:x.shape[0]], self.device)

    def process(self, x : torch.Tensor):
        r"""Used to format the output to the correct shape"""

        # Adding number of forecasted days
        x = torch.unsqueeze(x, dim = 0) if self.outputs == 1 else \
            torch.stack([x for i in range(self.outputs)], dim = 0)

        # Adding batch size
        return torch.stack([x for i in range(self.bs)], dim = 0)

    def count_parameters(self,):
        r"""Determines the number of trainable parameters in the model"""
        return int(0)

In [None]:
# Neural Network stuff
neural_network = FCNN(problem = "regression",
                      inputs  = 4,
                      outputs = 1,
                      window_transformation = 1,
                      kernel_size = 3,
                      scaling = 1)


#neural_network = AVERAGE(data_oxygen, "cuda", {})


optimizer      = optim.Adam(neural_network.parameters(), lr = 0.001)

# Check if a GPU is available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

neural_network.to(device)

In [None]:
# ------------------------------------------------
show = True

for epoch in range(1):

    metrics_tool = BlackSea_Metrics(mode = "regression",
                                    mask = bs_mask_with_depth,
                           mask_complete = bs_mask_complete,
                                treshold = norm_oxy,
                       number_of_samples = BSD_loader.get_number_of_samples("validation"))

    for x, t, y in ds_train:

        x, t, y = x.to(device), t.to(device), y.to(device)
        prediction = neural_network(x, t)
        loss_training = compute_loss(y_pred = prediction, y_true = y,mask = bs_mask_with_depth, problem = "regression", device = "cpu", kwargs = {})
        print(f"E{epoch} - Loss (Training):", loss_training.item())
        optimizer.zero_grad()
        loss_training.backward()
        optimizer.step()

        # Cleaning
        del x, t, y, prediction, loss_training
        torch.cuda.empty_cache()
        break


    with torch.no_grad():

        # Stores all the predictions for the metrics (plots)
        prediction_all = None

        for x, t, y in ds_validation:

            # Making prediction
            x, t, y = x.to(device), t.to(device), y.to(device)
            prediction = neural_network(x, t)
            loss_validation = compute_loss(y_pred = prediction, y_true = y, mask = bs_mask_with_depth, problem = "regression", device = "cpu", kwargs = {})
            print(f"E{epoch} - Loss (Validation):", loss_validation.item())
            x, t, y, prediction = x.to("cpu"), t.to("cpu"), y.to("cpu"), prediction.to("cpu")

            """
            # Plotting mean against ground truth in a subplot
            if show:

                # Highlighting hypoxic areas
                y_hyp = ( y < norm_oxy ) * 1.0
                p_hyp = ( prediction < norm_oxy ) * 1.0

                # Hiding non-obserable areas
                p_hyp[:,:,:, y[0, 0, 0] == -1] = torch.nan
                y_hyp[:,:,:, y[0, 0, 0] == -1] = torch.nan


                plt.figure(figsize = (20, 20))
                plt.subplot(1, 3, 1)
                plt.imshow(torch.flipud(y_hyp[0, 0, 0]))
                plt.subplot(1, 3, 2)
                plt.imshow(torch.flipud(p_hyp[0, 0, 0]))
                plt.subplot(1, 3, 3)
                plt.imshow(torch.flipud(y_hyp[0, 0, 0]) - torch.flipud(p_hyp[0, 0, 0]))
                plt.setp(plt.gcf().get_axes(), xticks = [], yticks = [])
                plt.subplot(1, 3, 1).set_title("Ground Truth", fontsize = 6)
                plt.subplot(1, 3, 2).set_title("Prediction", fontsize = 6)
                plt.subplot(1, 3, 3).set_title("Difference", fontsize = 6)
                plt.show()

                prediction[:,:,:, y[0,0,0,:,:] == -1] = torch.nan
                y[:,:,:, y[0,0,0,:,:] == -1] = torch.nan

                plt.figure(figsize = (20, 20))
                plt.subplot(1, 3, 1)
                plt.imshow(torch.flipud(y[0, 0, 0]),  vmin = 0, vmax = 1)
                plt.subplot(1, 3, 2)
                plt.imshow(torch.flipud(prediction[0, 0, 0]), vmin = 0, vmax = 1)
                plt.subplot(1, 3, 3)
                plt.imshow(torch.flipud(torch.exp(prediction[0, 0, 1]/2)), vmin = 0, vmax = 1)
                plt.setp(plt.gcf().get_axes(), xticks = [], yticks = [])
                plt.subplot(1, 3, 1).set_title("Ground Truth", fontsize = 6)
                plt.subplot(1, 3, 2).set_title("Prediction (Mean)", fontsize = 6)
                plt.subplot(1, 3, 3).set_title("Prediction (Std)", fontsize = 6)
                plt.show()

            """

            # Concatenating all the predictions
            prediction_all = torch.cat((prediction_all, prediction), dim = 0) if prediction_all is not None else prediction

            metrics_tool.compute_plots(  y_pred = prediction_all, y_true = y_vall_all)

            del x, t, y, prediction
            torch.cuda.empty_cache()



    # Metrics
    y_vall_all = torch.from_numpy(BSD_loader.y_validation)
    metrics_tool.compute_metrics(y_pred = prediction_all, y_true = y_vall_all)
    #metrics_tool.compute_plots_comparison_regression(y_pred = prediction_all, y_true = y_vall_all)
    metrics_tool.compute_plots(  y_pred = prediction_all, y_true = y_vall_all)

    # Getting the results
    if show:
        results, results_name = metrics_tool.get_results()
        for r, n in zip(results[0], results_name):
            print(n, " : ", r)
        print("\n")
