In [46]:
%load_ext nb_black

The nb_black extension is already loaded. To reload it, use:
  %reload_ext nb_black


<IPython.core.display.Javascript object>

In [47]:
# Basics
import tensorflow as tf
import xarray as xr

# Helpful
import tqdm

# Visualization
import matplotlib.pyplot as plt

# My Methods
from src.utils.CRPS import *
from src.utils.data_split import *
from src.models.EMOS import *
from src.models.EMOS_global.EMOS_global_load_score import *
from src.models.EMOS_global.EMOS_global_load_model import *
import data.raw.load_data_raw as ldr
import data.processed.load_data_processed as ldp
import data.processed.load_data_processed_denormed as ldpd
from src.models.CRPS_baseline.CRPS_load import *

<IPython.core.display.Javascript object>

### 1. Load Data

In [48]:
dat_train_denorm = ldpd.load_data_all_train_proc_denorm()
dat_test_denorm = ldpd.load_data_all_test_proc_denorm()

<IPython.core.display.Javascript object>

### 2. Data Split

In [49]:
dat_X_lead_all_denorm, dat_y_lead_all_denorm = split_var_lead(dat_train_denorm)

<IPython.core.display.Javascript object>

### 3. Data preparation

In [56]:
dat_X_lead_all_denorm[0][0]

<IPython.core.display.Javascript object>

In [50]:
def flatten_with_grid_ids(da):
    """
    Flatten an xarray DataArray and generate corresponding grid point IDs.
    
    Args:
        da (xarray.DataArray): The DataArray to flatten.
        
    Returns:
        A tuple (flattened_values, grid_ids), where:
            - flattened_values (numpy.ndarray): A 1D array with all values from the DataArray.
            - grid_ids (numpy.ndarray): A 1D array with the corresponding grid point ID for each value.
    """
    # Get the shapes of the 'lat' and 'lon' dimensions
    lat_shape = da.sizes["lat"]
    lon_shape = da.sizes["lon"]

    # Generate a 2D array with the grid point ID for each (lat, lon) pair
    grid_id_2d = np.arange(lat_shape * lon_shape).reshape(lat_shape, lon_shape) + 1

    # Repeat the 2D grid ID array along the other dimensions to match the shape of the DataArray
    grid_id_nd = np.repeat(grid_id_2d[None, :, :], da.sizes["forecast_date"], axis=0)
    grid_id_nd = np.repeat(grid_id_nd[..., None], da.sizes["mean_std"], axis=-1)

    # Flatten both the DataArray values and the grid ID array
    flattened_values = da.values.flatten()
    grid_ids = grid_id_nd.flatten()

    return flattened_values, grid_ids

<IPython.core.display.Javascript object>

In [51]:
flattened_values, grid_ids = flatten_with_grid_ids(dat_X_lead_all_denorm[0][0])

<IPython.core.display.Javascript object>

In [52]:
flattened_values

array([-0.463738 ,  2.675326 , -1.2450967, ...,  2.4402175,  1.4866699,
        2.6078432], dtype=float32)

<IPython.core.display.Javascript object>

In [54]:
grid_ids

array([    1,     1,     2, ..., 15599, 15600, 15600])

<IPython.core.display.Javascript object>

In [58]:
dat_X_lead_all_denorm[0][0]

<IPython.core.display.Javascript object>

In [61]:
dat_X_lead_all_denorm[0][0].isel(forecast_date=1428, lat=119, lon=129, mean_std=1)

<IPython.core.display.Javascript object>

In [45]:
def build_emb_model(
    n_features,
    n_outputs,
    hidden_nodes,
    emb_size,
    max_id,
    compile=False,
    optimizer="adam",
    lr=0.01,
    loss=crps_cost_function,
    activation="relu",
    reg=None,
):
    """

    Args:
        n_features: Number of features
        n_outputs: Number of outputs
        hidden_nodes: int or list of hidden nodes
        emb_size: Embedding size
        max_id: Max embedding ID
        compile: If true, compile model
        optimizer: Name of optimizer
        lr: learning rate
        loss: loss function
        activation: Activation function for hidden layer

    Returns:
        model: Keras model
    """
    if type(hidden_nodes) is not list:
        hidden_nodes = [hidden_nodes]

    features_in = Input(shape=(n_features,))
    id_in = Input(shape=(1,))
    emb = Embedding(max_id + 1, emb_size)(id_in)
    emb = Flatten()(emb)
    x = Concatenate()([features_in, emb])
    for h in hidden_nodes:
        x = Dense(h, activation=activation, kernel_regularizer=reg)(x)
    x = Dense(n_outputs, activation="linear", kernel_regularizer=reg)(x)
    model = Model(inputs=[features_in, id_in], outputs=x)

    if compile:
        opt = keras.optimizers.__dict__[optimizer](lr=lr)
        model.compile(optimizer=opt, loss=loss)
    return model

<IPython.core.display.Javascript object>