In [1]:
%load_ext nb_black

<IPython.core.display.Javascript object>

In [3]:
# Helpful
import tqdm

# Visualization
import matplotlib.pyplot as plt

# My Methods
from src.utils.CRPS import *
from src.utils.data_split import *
from src.models.EMOS import *
from src.models.EMOS_global.EMOS_global_load_score import *
from src.models.EMOS_global.EMOS_global_load_model import *
import data.raw.load_data_raw as ldr
import data.processed.load_data_processed as ldp
import data.processed.load_data_processed_denormed as ldpd
from src.models.CRPS_baseline.CRPS_load import *

<IPython.core.display.Javascript object>

### 1. Load Raw Data

### 3. Implementation:

In [16]:
def EMOS_local_train(var_num, lead_time, batch_size=4096, epochs=10, lr=0.001, validation_split=0.2, optimizer="Adam"):
    """
    Train a global EMOS models for a specific variable and lead_time for all individual gridpoints

    Args: 
        var_num (integer): number between 0 - 5 for each of the variables["u10", "v10", "t2m", "t850", "z500", "ws10"]
        lead_time (integer): number between 0 - 30 for each lead_time
    """
    # Adjust lead_time for 1-based indexing
    lead_time = lead_time + 1

    # Define the cost function depending on the variable number
    crps = crps_cost_function_trunc if var_num in [5] else crps_cost_function

    # Define the names of the variables
    var_names = ["u10", "v10", "t2m", "t850", "z500", "ws10"]

    # Load the training data for gridpoint
    train_var_denormed = ldpd.load_data_all_train_proc_denorm()[var_num]

    # Split the data into features and target
    for lat in range(120):
        for lon in range(130):
            # Split the data into features and target
            X_train_var_denormed = train_var_denormed[
                list(train_var_denormed.data_vars.keys())[0]
            ].isel(lead_time=lead_time, lat=lat, lon=lon)
            y_train_var_denormed = train_var_denormed[
                list(train_var_denormed.data_vars.keys())[1]
            ].isel(lead_time=lead_time, lat=lat, lon=lon)

            # Build and compile the model
            EMOS_glob = build_EMOS_network_keras(
                compile=True, lr=lr, loss=crps, optimizer=optimizer
            )

            # Save the model
            model_filename = f"/home/dchen/BA_CH_EN/models/EMOS_local_models/denormed/EMOS_loc_{var_names[var_num]}_lead_time_{lead_time - 1}_{lat}_{lon}_denormed.h5"
            
            # Define callbacks for early stopping and model checkpointing
            early_stopping = EarlyStopping(monitor="val_loss", patience=3)
            model_checkpoint = ModelCheckpoint(
                model_filename, monitor="val_loss", mode="min", save_best_only=True
            )
            
            # Fit the model to the training data
            EMOS_glob.fit(
                [
                    X_train_var_denormed.isel(mean_std=0).values.flatten(),
                    X_train_var_denormed.isel(mean_std=1).values.flatten(),
                ],
                y_train_var_denormed.values.flatten(),
                batch_size=batch_size,
                epochs=epochs,
                validation_split=validation_split,
                callbacks=[early_stopping, model_checkpoint],
            )

<IPython.core.display.Javascript object>

In [17]:
def EMOS_local_load_model_var_lead(var_name, lead_time):
    """
    Load all the saved EMOS global models for a specific variable and arrange them in a 2D list.

    Args:
        var_name (str): The variable name used in the model files.

    Returns:
        list: A 2D list (120 x 130) of TensorFlow models.
    """
    path = "/home/dchen/BA_CH_EN/models/EMOS_local_models/denormed/"

    # Create a 2D list for the models
    models = [[None for _ in range(130)] for _ in range(120)]

    # Load each model file and store it in the 2D list
    for lat in range(120):
        for lon in range(130):
            # Create the filename
            filename = f"EMOS_loc_{var_name}_lead_time_{lead_time - 1}_{lat}_{lon}_denormed.h5"
            model_path = os.path.join(path, filename)

            # Load the model and store it in the list
            if os.path.isfile(model_path):
                models[lat][lon] = tf.keras.models.load_model(
                    model_path,
                    custom_objects={
                        "crps_cost_function": crps_cost_function,
                        "crps_cost_function_trunc": crps_cost_function_trunc,
                    },
                )
    return models

<IPython.core.display.Javascript object>

In [14]:
def EMOS_global_train(
    var_num,
    lead_time,
    batch_size=4096,
    epochs=10,
    lr=0.001,
    validation_split=0.2,
    optimizer="Adam",
):
    """
    Train all global EMOS models for a specific variable and lead_time

    Args: 
        var_num (integer): number between 0 - 5 for each of the variables["u10", "v10", "t2m", "t850", "z500", "ws10"]
        lead_time (integer): number between 0 - 30 for each lead_time
    """

    # Adjust lead_time for 1-based indexing
    lead_time = lead_time + 1

    # Define the names of the variables
    var_names = ["u10", "v10", "t2m", "t850", "z500", "ws10"]

    # Load the training data
    train_var_denormed = ldpd.load_data_all_train_proc_denorm()[var_num]

    # Split the data into features and target
    X_train_var_denormed = train_var_denormed[
        list(train_var_denormed.data_vars.keys())[0]
    ].isel(lead_time=lead_time)
    y_train_var_denormed = train_var_denormed[
        list(train_var_denormed.data_vars.keys())[1]
    ].isel(lead_time=lead_time)

    # Define the cost function depending on the variable number
    if var_num in [5]:
        crps = crps_cost_function_trunc
    else:
        crps = crps_cost_function

    # Build and compile the model
    EMOS_glob = build_EMOS_network_keras(
        compile=True, lr=lr, loss=crps, optimizer=optimizer
    )

    # Define the filename for the model checkpoint
    model_filename = (
        "/home/dchen/BA_CH_EN/models/EMOS_global_models/denormed/EMOS_glob_"
        + var_names[var_num]
        + "_lead_time_"
        + str(lead_time - 1)
        + "_denormed.h5"
    )

    # Define callbacks for early stopping and model checkpointing
    early_stopping = EarlyStopping(monitor="val_loss", patience=3)
    model_checkpoint = ModelCheckpoint(
        model_filename, monitor="val_loss", mode="min", save_best_only=True
    )

    # Fit the model to the training data
    EMOS_glob.fit(
        [
            X_train_var_denormed.isel(mean_std=0).values.flatten(),
            X_train_var_denormed.isel(mean_std=1).values.flatten(),
        ],
        y_train_var_denormed.values.flatten(),
        batch_size=batch_size,
        epochs=epochs,
        validation_split=validation_split,
        callbacks=[early_stopping, model_checkpoint],
    )

    # Load the best model and return
    best_model = tf.keras.models.load_model(
        model_filename,
        custom_objects={
            "crps_cost_function": crps_cost_function,
            "crps_cost_function_trunc": crps_cost_function_trunc,
        },
    )

    return best_model

<IPython.core.display.Javascript object>