In [11]:
import json
import random
import sys
import warnings
from pathlib import Path

import optuna
import pandas as pd

# Evaluate model performance

# Add parent directory to Python path to access src modules
sys.path.append(str(Path("..").resolve()))
from src.readers.geom_reader import load_geodata
from src.readers.hydro_data_reader import find_valid_gauges
from src.utils.logger import setup_logger

optuna.logging.set_verbosity(optuna.logging.WARNING)
warnings.filterwarnings("ignore")
# Set random seed for reproducibility
random.seed(42)
logger = setup_logger(name="ModelMetrics", log_file="../logs/metrics.log")


In [29]:
e_obs_ws, e_obs_gauges = load_geodata(folder_depth="..")
logger.info("Finding gauges with valid data...")
full_gauges, partial_gauges = find_valid_gauges(e_obs_ws, Path("../data/HydroFiles"))

2025-06-23 17:27:57 | ModelMetrics | INFO     | <module>:2 | Finding gauges with valid data...


In [None]:
METEO_DATASETS = ["meteo_ru_nc_02", "e_obs", "era5_land", "mswep"]

metrics = {}

for model in ["catboost", "gr4j_optuna"]:
    if model == "catboost":
        continue  # Skip catboost for now, as it is not implemented yet
    metrics[model] = {}
    for meteo_dataset in METEO_DATASETS:
        metrics[model][meteo_dataset] = []
        for gauge_id in full_gauges:
            with open(f"../data/res/{model}/{gauge_id}/{gauge_id}_{meteo_dataset}/metrics.json") as f:
                gauge_metric = json.load(f)
            metrics[model][meteo_dataset].append(pd.DataFrame(gauge_metric, index=[gauge_id]))


In [20]:
e_obs_metrics = pd.concat(metrics["gr4j_optuna"]["e_obs"], axis=0)
meteo_ru_nc_02_metrics = pd.concat(metrics["gr4j_optuna"]["meteo_ru_nc_02"], axis=0)
era5_land_metrics = pd.concat(metrics["gr4j_optuna"]["era5_land"], axis=0)
mswep_metrics = pd.concat(metrics["gr4j_optuna"]["mswep"], axis=0)

In [28]:
meteo_ru_nc_02_metrics.median()

NSE        0.528453
KGE        0.633659
PBIAS      9.716214
RMSE       0.476011
MAE        0.276182
logNSE     0.401387
R2         0.601733
PFE      -14.226462
dtype: float64

In [27]:
mswep_metrics.median()

NSE       0.537790
KGE       0.648401
PBIAS     5.483117
RMSE      0.461765
MAE       0.262329
logNSE    0.415679
R2        0.662942
PFE      -6.939282
dtype: float64

In [25]:
era5_land_metrics.median()

NSE        0.556139
KGE        0.648730
PBIAS      3.997765
RMSE       0.447478
MAE        0.269168
logNSE     0.431665
R2         0.642265
PFE      -16.035996
dtype: float64

In [26]:
e_obs_metrics.median()


NSE        0.122898
KGE        0.343731
PBIAS      2.000565
RMSE       0.585764
MAE        0.338375
logNSE     0.125736
R2         0.404737
PFE      -20.656552
dtype: float64