# Original pipeline

At some point, fighting with my own pipeline code, I tried again to impelemnt the original pipeline to be able to compare the ground truth to my implementation

In [None]:
from lib.ClimSim.climsim_utils.data_utils import data_utils
import xarray as xr
import os
import glob

os.environ["CUDA_VISIBLE_DEVICES"] = "-1"
import tensorflow as tf

2026-02-01 17:27:26.138276: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [7]:
grid_info = xr.open_dataset("data/ClimSim_low-res/ClimSim_low-res_grid-info.nc")
input_mean = xr.open_dataset("lib/ClimSim/preprocessing/normalizations/inputs/input_mean.nc", engine="h5netcdf")
input_max = xr.open_dataset("lib/ClimSim/preprocessing/normalizations/inputs/input_max.nc", engine="h5netcdf")
input_min = xr.open_dataset("lib/ClimSim/preprocessing/normalizations/inputs/input_min.nc", engine="h5netcdf")
output_scale = xr.open_dataset("lib/ClimSim/preprocessing/normalizations/outputs/output_scale.nc", engine="h5netcdf")

data = data_utils(
    grid_info=grid_info,
    input_mean=input_mean,
    input_max=input_max,
    input_min=input_min,
    output_scale=output_scale
)

data.set_to_v1_vars()
data.ml_backend = "tensorflow"

In [10]:
data.data_path = "data/ClimSim_low-res/train/" 
data.set_regexps('train', ["E3SM-MMF.mli.*.nc"])
data.set_stride_sample('train', 1)
data.set_filelist(data_split='train')

print("Nombre de fihciers d'entrainement :", len(data.get_filelist('train')))
tf_records = data.load_ncdata_with_generator(data_split='train')

Nombre de fihciers d'entrainement : 10890


In [11]:
MLP_PATH = "lib/ClimSim/baseline_models/MLP/model/backup_phase-7_retrained_models_step2_lot-147_trial_0027.best.h5"

mlp_model = tf.keras.models.load_model(MLP_PATH, compile=False)
# mlp_model.summary()



In [12]:
import numpy as np
import re
import tensorflow as tf

FEATURES = {
    "features" :{
        "multilevel" : ["in_state_t", "in_state_q0001"],
        "surface" : [ "in_state_ps", 'in_pbuf_SOLIN', 'in_pbuf_LHFLX', 'in_pbuf_SHFLX'],
    },  
    "target" :{
        "tendancies" : ["out_ptend_t", "out_ptend_q0001"],
        "surface" : ["out_cam_out_NETSW", "out_cam_out_FLWDS", "out_cam_out_PRECSC", "out_cam_out_PRECC", "out_cam_out_SOLS", "out_cam_out_SOLL", "out_cam_out_SOLSD", "out_cam_out_SOLLD"]
    }
}


target_vars = FEATURES["target"]["tendancies"] + FEATURES["target"]["surface"]

stats = {}
for var in target_vars:
    short_name = re.sub(r'^(in_|out_)', '', var) 
    size = 60 if var in FEATURES["target"]["tendancies"] else 1
    stats[short_name] = {
        "ss_res": np.zeros(size),
        "sum_abs_err": np.zeros(size),
        "sum_y": np.zeros(size),
        "sum_y_sq": np.zeros(size),
        "count": 0
    }

# 2. Boucle d'évaluation sur le Dataset TF
# On utilise un petit nombre de batches pour ton test (ex: take(5))
test_ds = tf_records.batch(10) # 384 = ncol (une grille complète par batch)
all_mae = {short_name: [] for short_name in stats.keys()}
print("Lancement de l'évaluation...")

for x_batch, y_true_batch in test_ds.take(5): 
    # 1. On récupère les dimensions dynamiquement
    # shape sera (10, 384, 124) ou (10, 384, 128)
    x_shape = tf.shape(x_batch)
    y_shape = tf.shape(y_true_batch)

    # -1 dit à TF de calculer automatiquement (10 * 384 = 3840)
    x_flat = tf.reshape(x_batch, [-1, 124])
    y_flat = tf.reshape(y_true_batch, [-1, 128])

    data.input_train = x_flat
    data.set_pressure_grid('train')

    # 3. Prédiction sur le vecteur aplati
    preds_flat = mlp_model.predict(x_flat, verbose=0)

    # 5. Conversion et stats
    true_dict = data.output_weighting(y_flat.numpy(), data_split='train')
    pred_dict = data.output_weighting(preds_flat, data_split='train')
    
    for var in target_vars:
        short_name = re.sub(r'^(out_|in_)', '', var)
        
        y_t = true_dict[short_name]
        y_p = pred_dict[short_name]
        
        # On appelle ta fonction
        # Elle renvoie un vecteur de 60 (niveaux) ou un scalaire
        mae_score = data.calc_MAE(y_p, y_t, avg_grid=True)
        all_mae[short_name].append(mae_score)

# 2. Affichage des moyennes finales
print(f"\n{'Variable':<25} | {'MAE Moyenne (W/m2)':<15}")
print("-" * 45)

for var, scores in all_mae.items():
    # Moyenne sur tous les batches, puis moyenne sur les 60 niveaux
    final_score = np.mean(np.mean(scores, axis=0))
    print(f"{var:<25} | {final_score:.4e}")

Lancement de l'évaluation...

Variable                  | MAE Moyenne (W/m2)
---------------------------------------------
ptend_t                   | 2.3972e+00
ptend_q0001               | 4.0709e+00
cam_out_NETSW             | 1.2766e+01
cam_out_FLWDS             | 5.5224e+00
cam_out_PRECSC            | 2.9727e+00
cam_out_PRECC             | 3.0532e+01
cam_out_SOLS              | 7.7381e+00
cam_out_SOLL              | 1.0262e+01
cam_out_SOLSD             | 4.6384e+00
cam_out_SOLLD             | 5.0515e+00


In [17]:
import numpy as np
import re
import tensorflow as tf

FEATURES = {
    "features": {
        "multilevel": ["in_state_t", "in_state_q0001"],
        "surface": ["in_state_ps", "in_pbuf_SOLIN", "in_pbuf_LHFLX", "in_pbuf_SHFLX"],
    },
    "target": {
        "tendancies": ["out_ptend_t", "out_ptend_q0001"],
        "surface": [
            "out_cam_out_NETSW", "out_cam_out_FLWDS", "out_cam_out_PRECSC",
            "out_cam_out_PRECC", "out_cam_out_SOLS", "out_cam_out_SOLL",
            "out_cam_out_SOLSD", "out_cam_out_SOLLD"
        ]
    }
}

target_vars = FEATURES["target"]["tendancies"] + FEATURES["target"]["surface"]

def short_name(var: str) -> str:
    return re.sub(r'^(in_|out_)', '', var)

def var_size(var: str) -> int:
    return 60 if var in FEATURES["target"]["tendancies"] else 1

def to_2d(y: np.ndarray, L: int) -> np.ndarray:
    """Force y à shape (N, L) pour accumulations."""
    y = np.asarray(y)
    if L == 1:
        return y.reshape(-1, 1)
    if y.shape[-1] != L:
        raise ValueError(f"Attendu dernière dim={L}, reçu shape {y.shape}")
    return y.reshape(-1, L)

SS_TOT_EPS = 1e-10  # seuil variance pour R² "valide"

# ---------------------------
# Accumulateurs globaux
# ---------------------------
stats = {}
for var in target_vars:
    sname = short_name(var)
    L = var_size(var)
    stats[sname] = {
        "sum_abs_err": np.zeros(L, dtype=np.float64),
        "ss_res":      np.zeros(L, dtype=np.float64),
        "sum_y":       np.zeros(L, dtype=np.float64),
        "sum_y_sq":    np.zeros(L, dtype=np.float64),
        "count":       0
    }

# Pour tes moyennes "comme avant"
all_mae = {sname: [] for sname in stats.keys()}

# Optionnel: scores R² par batch (moyenne ensuite)
all_r2_batch = {sname: [] for sname in stats.keys()}

print("Lancement de l'évaluation...")

# Dataset
test_ds = tf_records.batch(100)  # ex: (10, 384, 124) / (10, 384, 128)

# ---------------------------
# Boucle
# ---------------------------
for x_batch, y_true_batch in test_ds.take(10):

    # ⚠️ Tu avais des dims hardcodées (124,128). Ici on reste compatible :
    in_dim  = int(x_batch.shape[-1])
    out_dim = int(y_true_batch.shape[-1])

    x_flat = tf.reshape(x_batch, [-1, in_dim])
    y_flat = tf.reshape(y_true_batch, [-1, out_dim])

    data.input_train = x_flat
    data.set_pressure_grid('train')

    preds_flat = mlp_model.predict(x_flat, verbose=0)

    # dictionnaires pondérés
    true_dict = data.output_weighting(y_flat.numpy(), data_split='train')
    pred_dict = data.output_weighting(preds_flat,      data_split='train')

    for var in target_vars:
        sname = short_name(var)
        L = var_size(var)

        y_t = true_dict[sname]
        y_p = pred_dict[sname]

        # ---------------------------
        # 1) MAE "comme avant" (batch-wise)
        # ---------------------------
        mae_score = data.calc_MAE(y_p, y_t, avg_grid=True)
        all_mae[sname].append(np.asarray(mae_score))

        # ---------------------------
        # 2) Accumulation pour GLOBAL (MAE + R²)
        # ---------------------------
        y_t2 = to_2d(y_t, L)  # (N, L)
        y_p2 = to_2d(y_p, L)  # (N, L)
        err = y_t2 - y_p2
        N = y_t2.shape[0]

        stats[sname]["sum_abs_err"] += np.sum(np.abs(err), axis=0)
        stats[sname]["ss_res"]      += np.sum(err**2, axis=0)
        stats[sname]["sum_y"]       += np.sum(y_t2, axis=0)
        stats[sname]["sum_y_sq"]    += np.sum(y_t2**2, axis=0)
        stats[sname]["count"]       += N

        # ---------------------------
        # 3) Optionnel: R² batch-wise (même formule)
        # ---------------------------
        y_mean_b = np.mean(y_t2, axis=0)
        ss_tot_b = np.sum((y_t2 - y_mean_b) ** 2, axis=0)
        ss_res_b = np.sum(err ** 2, axis=0)

        r2_b = np.full(L, np.nan, dtype=np.float64)
        mask_b = ss_tot_b > SS_TOT_EPS
        r2_b[mask_b] = 1.0 - (ss_res_b[mask_b] / ss_tot_b[mask_b])

        all_r2_batch[sname].append(r2_b)

# ---------------------------
# Affichages
# ---------------------------


# B) Scores globaux (MAE global + R² global robuste)
print(f"\n=== SCORES GLOBAUX (dataset-level) ===")
print(f"{'Variable':<25} | {'MAE global':>12} | {'R2 global':>12} | {'% R2 valides':>12}")
print("-" * 70)

for sname, s in stats.items():
    N = s["count"]
    if N == 0:
        print(f"{sname:<25} | {'NA':>12} | {'NA':>12} | {'NA':>12}")
        continue

    mae_per_level = s["sum_abs_err"] / N

    y_mean = s["sum_y"] / N
    ss_tot = s["sum_y_sq"] - N * (y_mean ** 2)
    ss_res = s["ss_res"]

    r2_per_level = np.full_like(ss_tot, np.nan, dtype=np.float64)
    mask = ss_tot > SS_TOT_EPS
    r2_per_level[mask] = 1.0 - (ss_res[mask] / ss_tot[mask])

    mae_global = float(np.nanmean(mae_per_level))
    r2_global  = float(np.nanmean(r2_per_level))
    pct_valid  = 100.0 * float(np.mean(mask))

    print(f"{sname:<25} | {mae_global:12.4e} | {r2_global:12.4e} | {pct_valid:11.1f}%")


Lancement de l'évaluation...


2026-02-01 17:41:19.864041: W external/local_xla/xla/tsl/framework/cpu_allocator_impl.cc:84] Allocation of 38092800 exceeds 10% of free system memory.
2026-02-01 17:41:19.864080: W external/local_xla/xla/tsl/framework/cpu_allocator_impl.cc:84] Allocation of 39321600 exceeds 10% of free system memory.
2026-02-01 17:41:25.148413: W external/local_xla/xla/tsl/framework/cpu_allocator_impl.cc:84] Allocation of 19660800 exceeds 10% of free system memory.
2026-02-01 17:41:40.312767: W external/local_xla/xla/tsl/framework/cpu_allocator_impl.cc:84] Allocation of 38092800 exceeds 10% of free system memory.
2026-02-01 17:41:40.312799: W external/local_xla/xla/tsl/framework/cpu_allocator_impl.cc:84] Allocation of 39321600 exceeds 10% of free system memory.



=== SCORES GLOBAUX (dataset-level) ===
Variable                  |   MAE global |    R2 global | % R2 valides
----------------------------------------------------------------------
ptend_t                   |   2.4911e+00 |   6.5439e-01 |       100.0%
ptend_q0001               |   4.2012e+00 |  -2.3460e+03 |        80.0%
cam_out_NETSW             |   1.2781e+01 |   9.8903e-01 |       100.0%
cam_out_FLWDS             |   5.4429e+00 |   9.9198e-01 |       100.0%
cam_out_PRECSC            |   2.8079e+00 |   8.6181e-01 |       100.0%
cam_out_PRECC             |   3.2678e+01 |   8.2283e-01 |       100.0%
cam_out_SOLS              |   7.6387e+00 |   9.7596e-01 |       100.0%
cam_out_SOLL              |   1.0139e+01 |   9.6466e-01 |       100.0%
cam_out_SOLSD             |   4.5480e+00 |   9.6528e-01 |       100.0%
cam_out_SOLLD             |   4.9300e+00 |   9.0005e-01 |       100.0%


2026-02-01 17:44:58.291840: I tensorflow/core/framework/local_rendezvous.cc:407] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
