<img src="../assets/header_notebook.png" />
<hr style="color:#5A7D9F;">
<p align="center">
    <b style="font-size:2vw; color:#5A7D9F; font-weight:bold;">
    <center>ESA - Black Sea Deoxygenation Emulator</center>
    </b>
</p>
<hr style="color:#5A7D9F;">

In [None]:
# ----------
# Librairies
# ----------
import os
import sys
import cv2
import xarray
import random
import dawgz
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

# Pytorch
import torch
import torch.nn as nn
import torch.optim as optim

# Dawgz (jobs //)
from dawgz import job, schedule

# -------------------
# Librairies (Custom)
# -------------------
# Adding path to source folder to load custom modules
sys.path.insert(1, '../src/debs/')
sys.path.insert(1, '../scripts/')

# Loading libraries
from dataset              import BlackSea_Dataset
from dataloader           import BlackSea_Dataloader
from dataset_evolution    import BlackSea_Dataset_Evolution
from dataset_distribution import BlackSea_Dataset_Distribution
from tools                import *

# -------
# Jupyter
# -------
%matplotlib inline
plt.rcParams.update({'font.size': 13})

# Making sure modules are reloaded when modified
%reload_ext autoreload
%autoreload 2

# Moving to the .py directory
%cd ../src/debs/

<hr style="color:#5A7D9F;">
<p align="center">
    <b style="font-size:2vw; color:#5A7D9F; font-weight:bold;">
    <center>Scripts</center>
    </b>
</p>
<hr style="color:#5A7D9F;">

In [None]:
# Analyzing the data (1):
%run script_distribution.py --start_year        0 \
                            --end_year          0 \
                            --start_month       1 \
                            --end_month         2 \
                            --dawgz         False

In [None]:
# Analyzing the data (2):
%run script_evolution.py --start_year        0 \
                         --end_year          0 \
                         --start_month       1 \
                         --end_month         2 \
                         --dawgz         False

In [None]:
# Analyzing the data (2):
%run script_training.py  --start_year                 0 \
                         --end_year                   0 \
                         --start_month                1 \
                         --end_month                  2 \
                         --inputs           temperature \
                         --dawgz         False

<hr style="color:#5A7D9F;">
<p align="center">
    <b style="font-size:2vw; color:#5A7D9F; font-weight:bold;">
    <center>Playground</center>
    </b>
</p>
<hr style="color:#5A7D9F;">

In [None]:
# -----------------
#    Parameters
# -----------------
#
# Time window
month_starting = 1
month_ending   = 1
year_starting  = 0
year_ending    = 0

# Maximum depth observed for oxygen, what is left is masked [m] (Note: To observe only the continental shelf set it to ~120m).
depth_max_oxygen = None

# ------------------
#  Loading the data
# ------------------
# Datasets !
Dataset_physical = BlackSea_Dataset(year_start = year_starting, year_end = year_ending, month_start = month_starting,  month_end = month_ending, variable = "grid_T")
Dataset_bio      = BlackSea_Dataset(year_start = year_starting, year_end = year_ending, month_start = month_starting,  month_end = month_ending, variable = "ptrc_T")

# Loading the different field values
data_oxygen        = Dataset_bio.get_oxygen_bottom(depth = depth_max_oxygen)
data_temperature   = Dataset_physical.get_temperature()
data_salinity      = Dataset_physical.get_salinity()
data_chlorophyll   = Dataset_bio.get_chlorophyll()
data_kshort        = Dataset_bio.get_light_attenuation_coefficient_short_waves()
data_klong         = Dataset_bio.get_light_attenuation_coefficient_long_waves()

# Loading the black sea mask
BS_mask = Dataset_physical.get_blacksea_mask()

# --------------------
#  Preparing the data
# --------------------
# Loading the dataloader
BSD_loader = BlackSea_Dataloader(x = [data_temperature, data_salinity, data_chlorophyll, data_kshort, data_klong],
                                 y = data_oxygen,
                              mask = BS_mask,
                              mode = "temporal",
                        resolution = 64,
                            window = 7,
                        window_oxy = 7)

# Retrieving the datasets
ds_train      = BSD_loader.get_dataloader("train")
ds_validation = BSD_loader.get_dataloader("validation")
ds_test       = BSD_loader.get_dataloader("test")

In [32]:
from itertools import product

arguments = {
    'month_start': [0],
    'month_end': [12],
    'year_start': [1],
    'year_end': [3],
    'Inputs': all_combinations,
    'Splitting': ["temporal", "spatial"],
    'Resolution': [64],
    'Window (Inputs)': [1, 3, 7, 14, 31],
    'Window (Output)': [7],
    'Architecture': ["FCNN"],
    'Learning Rate': [0.01, 0.001, 0.0001],
    'Kernel Size': [3, 5, 7, 9, 11],
    'Batch Size': [64]
}

# Generate all combinations
param_combinations = list(product(*arguments.values()))

# Create a list of dictionaries
param_dicts = [dict(zip(arguments.keys(), combo)) for combo in param_combinations]

# Print or use the list of dictionaries as needed
for param_dict in param_dicts:
    print(param_dict)

{'month_start': 0, 'month_end': 12, 'year_start': 1, 'year_end': 3, 'Inputs': ['temperature'], 'Splitting': 'temporal', 'Resolution': 64, 'Window (Inputs)': 1, 'Window (Output)': 7, 'Architecture': 'FCNN', 'Learning Rate': 0.01, 'Kernel Size': 3, 'Batch Size': 64}
{'month_start': 0, 'month_end': 12, 'year_start': 1, 'year_end': 3, 'Inputs': ['temperature'], 'Splitting': 'temporal', 'Resolution': 64, 'Window (Inputs)': 1, 'Window (Output)': 7, 'Architecture': 'FCNN', 'Learning Rate': 0.01, 'Kernel Size': 5, 'Batch Size': 64}
{'month_start': 0, 'month_end': 12, 'year_start': 1, 'year_end': 3, 'Inputs': ['temperature'], 'Splitting': 'temporal', 'Resolution': 64, 'Window (Inputs)': 1, 'Window (Output)': 7, 'Architecture': 'FCNN', 'Learning Rate': 0.01, 'Kernel Size': 7, 'Batch Size': 64}
{'month_start': 0, 'month_end': 12, 'year_start': 1, 'year_end': 3, 'Inputs': ['temperature'], 'Splitting': 'temporal', 'Resolution': 64, 'Window (Inputs)': 1, 'Window (Output)': 7, 'Architecture': 'FCNN',

wandb: ERROR Dropped streaming file chunk (see wandb/debug-internal.log)
wandb: ERROR Dropped streaming file chunk (see wandb/debug-internal.log)
wandb: ERROR Dropped streaming file chunk (see wandb/debug-internal.log)
wandb: ERROR Dropped streaming file chunk (see wandb/debug-internal.log)
wandb: ERROR Dropped streaming file chunk (see wandb/debug-internal.log)
wandb: ERROR Dropped streaming file chunk (see wandb/debug-internal.log)
wandb: ERROR Dropped streaming file chunk (see wandb/debug-internal.log)


In [31]:
from itertools import combinations

# Your original list
input_list = ["temperature", "salinity", "chlorophyll", "kshort", "klong"]

# Generate all combinations
all_combinations = []
for r in range(1, len(input_list) + 1):
    all_combinations.extend(combinations(input_list, r))

# Convert combinations to lists
all_combinations = [list(combination) for combination in all_combinations]


['temperature']
['salinity']
['chlorophyll']
['kshort']
['klong']
['temperature', 'salinity']
['temperature', 'chlorophyll']
['temperature', 'kshort']
['temperature', 'klong']
['salinity', 'chlorophyll']
['salinity', 'kshort']
['salinity', 'klong']
['chlorophyll', 'kshort']
['chlorophyll', 'klong']
['kshort', 'klong']
['temperature', 'salinity', 'chlorophyll']
['temperature', 'salinity', 'kshort']
['temperature', 'salinity', 'klong']
['temperature', 'chlorophyll', 'kshort']
['temperature', 'chlorophyll', 'klong']
['temperature', 'kshort', 'klong']
['salinity', 'chlorophyll', 'kshort']
['salinity', 'chlorophyll', 'klong']
['salinity', 'kshort', 'klong']
['chlorophyll', 'kshort', 'klong']
['temperature', 'salinity', 'chlorophyll', 'kshort']
['temperature', 'salinity', 'chlorophyll', 'klong']
['temperature', 'salinity', 'kshort', 'klong']
['temperature', 'chlorophyll', 'kshort', 'klong']
['salinity', 'chlorophyll', 'kshort', 'klong']
['temperature', 'salinity', 'chlorophyll', 'kshort', 'k

wandb: ERROR Dropped streaming file chunk (see wandb/debug-internal.log)
wandb: ERROR Dropped streaming file chunk (see wandb/debug-internal.log)


<hr style="color:#5A7D9F;">
<p align="center">
    <b style="font-size:2vw; color:#5A7D9F; font-weight:bold;">
    <center>Testing</center>
    </b>
</p>
<hr style="color:#5A7D9F;">

In [None]:
# --------------------
#       TESTING
# --------------------
#
# Number of input variables
var_inputs = 2

# Window for the input
win_in = 2

# Window for the oxygen
win_out = 1

# Number of days
days = 14



# --------------------
#      GENERATING
# --------------------
# Generating fake data
fake_data_physical_variables = generateFakeDataset(resolution = 128, number_of_variables = var_inputs, number_of_samples=days)
fake_data_physical_oxygen    = generateFakeDataset(resolution = 128, number_of_variables = 1, number_of_samples=days, oxygen = True)

# Creation of the dataloaders
BSD_loader_fake_spatial = BlackSea_Dataloader(x = fake_data_physical_variables,
                                              y = fake_data_physical_oxygen[0],
                                           mask = np.ones(shape = (258, 258)),
                                           mode = "spatial",
                                     resolution = 128,
                                        window = win_in,
                                    window_oxy = win_out)

BSD_loader_fake_temporal = BlackSea_Dataloader(x = fake_data_physical_variables,
                                               y = fake_data_physical_oxygen[0],
                                            mask = np.ones(shape = (258, 258)),
                                            mode = "temporal",
                                      resolution = 128,
                                          window = win_in,
                                      window_oxy = win_out)


# OK
"""
for i in range(fake_data_physical_variables[0].shape[0]):

    plt.figure(figsize=(5, 5))
    plt.imshow(fake_data_physical_variables[1][i, :, :])
"""

# --------------------------------------------------------------------------------
#                                       SPATIAL
# --------------------------------------------------------------------------------
"""
for x, y in BSD_loader_fake_spatial.get_dataloader("train"):

    # Initial shapes
    print("Input shape: ", len(fake_data_physical_variables), fake_data_physical_variables[0].shape, "\nOutput shape: ", fake_data_physical_oxygen[0].shape)

    # Shapes (dataloader)
    print("Input shape: ", x.shape, "\nOutput shape: ", y.shape)

    # The total number of samples is
    # [Number of timesteps - number of input days (window) - number of output days (window_oxygen) ] * number of regions
    #
    # Tests
    #
    # Number of variables
    assert x.shape[1] == var_inputs * win_in

    # Number of outputs
    assert y.shape[1] == win_out

    # Number of samples (must be divided by 2 for the validation and test)
    assert x.shape[0] == (fake_data_physical_variables[0].shape[0] - win_in - win_out) * int(256/128)

    # Checking that I have all the timesteps
    #
    # Looping over all the time steps
    for i in range(x.shape[0]):

        # Showing as a subplots the input and output pairs
        plt.figure(figsize=(14, 14))

        for j in range(var_inputs * win_in):
            plt.subplot(1, var_inputs * win_in + win_out, j+1)

            # Removing labels and tickz
            plt.xticks([])
            plt.yticks([])
            plt.grid(False)
            plt.imshow(x[i, j, :, :])

        for j in range(win_out):
            plt.subplot(1, var_inputs * win_in + win_out, var_inputs * win_in + j+1)
            # Removing labels and tickz
            plt.xticks([])
            plt.yticks([])
            plt.grid(False)

            plt.imshow(y[i, j, :, :])

    # Idea check the validity using a trehsold to create a 1 0 matrix for comparison (otherwise it will  bug since you have normalized the data)

"""
# --------------------------------------------------------------------------------
#                                       SPATIAL
# --------------------------------------------------------------------------------
for x, y in BSD_loader_fake_temporal.get_dataloader("train"):

    # Initial shapes
    print("Input shape: ", len(fake_data_physical_variables), fake_data_physical_variables[0].shape, "\nOutput shape: ", fake_data_physical_oxygen[0].shape)

    # Shapes (dataloader)
    print("Input shape: ", x.shape, "\nOutput shape: ", y.shape)

    # Checking that I have all the regions
    #
    # Looping over all the time steps
    for i in range(x.shape[0]):

        # Showing as a subplots the input and output pairs
        plt.figure(figsize=(14, 14))

        for j in range(var_inputs * win_in):
            plt.subplot(1, var_inputs * win_in + win_out, j+1)

            # Removing labels and tickz
            plt.xticks([])
            plt.yticks([])
            plt.grid(False)
            plt.imshow(x[i, j, :, :])

        for j in range(win_out):
            plt.subplot(1, var_inputs * win_in + win_out, var_inputs * win_in + j+1)
            # Removing labels and tickz
            plt.xticks([])
            plt.yticks([])
            plt.grid(False)

            plt.imshow(y[i, j, :, :])


In [None]:
        # Storing all the information
        arguments = {

            # Temporal Information
            'month_start'     : args.start_month,
            'month_end'       : args.end_month,
            'year_start'      : args.start_year,
            'year_end'        : args.end_year,

            # Datasets
            "Inputs"          : args.inputs,
            "Splitting"       : args.splitting,
            "Resolution"      : args.resolution,
            "Window (Inputs)" : args.windows_inputs,
            "Window (Output)" : args.winosws_outputs,

            # Training
            "Architecture"    : args.architecture,
            "Learning Rate"   : args.learning_rate,
            "Kernel Size"     : args.kernel_size,
            "Batch Size"      : args.batch_size

        }