In [1]:
from pathlib import Path
import sys

import geopandas as gpd
from neuralhydrology.evaluation import get_tester
from neuralhydrology.utils.config import Config
import xarray as xr

sys.path.append("../")
from src.readers.geom_reader import load_geodata
from src.utils.logger import setup_logger

LOG = setup_logger("model_comparison", log_file="../logs/fine_tuning.log")
# Load watershed geometries and gauge locations
ws, gauges = load_geodata(folder_depth="../")
common_index = gauges.index.to_list()
basemap_data = gpd.read_file("../data/geometry/basemap_2023.gpkg")

partial_ws = gpd.read_file("../data/geometry/partial_ws.gpkg")
partial_ws.set_index("gauge_id", inplace=True)
partial_gauges = gpd.read_file("../data/geometry/partial_gauges.gpkg")
partial_gauges.set_index("gauge_id", inplace=True)

join_index = gauges.index.to_list() + partial_gauges.index.to_list()

with open("../data/models/fine_tune/blind_and_full.txt", "w") as the_file:
    for gauge_name in join_index:
        the_file.write(f"{int(gauge_name)}\n")

In [None]:
for f in list(Path("../data/time_series/").glob("*.nc")):
    with xr.open_dataset(f) as ds:
        if "gauge_id" in ds.indexes:
            ds = ds.reset_index("gauge_id", drop=True)
        elif "gauge_id" in ds.dims:
            ds = ds.squeeze("gauge_id", drop=True)
        elif "index" in ds.indexes:
            ds = ds.rename({"index": "date"})
        else:
            continue
        ds = ds.load()  # Load data into memory before closing file
    ds.to_netcdf(f)  # Write after the file is closed


In [3]:
LOG.info(f"Loaded {len(gauges)} gauges with hybrid classification")
cfg_pathes = {
    "gpcp": {
        "path": Path(
            "../data/lstm_configs/model_runs/cudalstm_q_mm_day_gpcp_no_autocorr_static_1203_080402/config.yml"
        ),
        "epoch": 24,
    },
    "mswep": {
        "path": Path(
            "../data/lstm_configs/model_runs/cudalstm_q_mm_day_mswep_no_autocorr_static_1103_191754/config.yml"
        ),
        "epoch": 24,
    },
    "e5l": {
        "path": Path(
            "../data/lstm_configs/model_runs/cudalstm_q_mm_day_era5l_no_autocorr_static_1003_133332/config.yml"
        ),
        "epoch": 26,
    },
    "e5": {
        "path": Path(
            "../data/lstm_configs/model_runs/cudalstm_q_mm_day_era5_no_autocorr_static_1203_220232/config.yml"
        ),
        "epoch": 20,
    },
}

model_results = {}

for model in ["gpcp", "mswep", "e5l", "e5"]:
    LOG.info(f"Evaluating {model}...")
    lstm_cfg = cfg_pathes[model]["path"]
    epoch = cfg_pathes[model]["epoch"]
    cfg_run = Config(lstm_cfg)

    cfg_run.update_config(
        {
            "train_basin_file": "../data/models/fine_tune/blind_and_full.txt",
            "validate_n_random_basins": len(join_index),
            "validation_basin_file": "../data/models/fine_tune/blind_and_full.txt",
            "test_basin_file": "../data/models/fine_tune/blind_and_full.txt",
            "test_start_date": "01/01/2009",
            "test_end_date": "31/12/2020",
        }
    )
    tester = get_tester(
        cfg=cfg_run, run_dir=cfg_run.run_dir, period="test", init_model=True
    )
    pred_results = tester.evaluate(epoch=epoch, save_results=True)
    model_results[model] = pred_results

[38;5;39m2025-12-29 13:28:42 | INFO     | PhDLogger | model_comparison | ℹ️  Loaded 996 gauges with hybrid classification[0m
[38;5;39m2025-12-29 13:28:42 | INFO     | PhDLogger | model_comparison | ℹ️  Evaluating gpcp...[0m


# Evaluation: 100%|██████████| 1405/1405 [04:25<00:00,  5.28it/s]


[38;5;39m2025-12-29 13:33:08 | INFO     | PhDLogger | model_comparison | ℹ️  Evaluating mswep...[0m


# Evaluation: 100%|██████████| 1405/1405 [04:27<00:00,  5.26it/s]


[38;5;39m2025-12-29 13:37:35 | INFO     | PhDLogger | model_comparison | ℹ️  Evaluating e5l...[0m


# Evaluation: 100%|██████████| 1405/1405 [04:30<00:00,  5.20it/s]


[38;5;39m2025-12-29 13:42:06 | INFO     | PhDLogger | model_comparison | ℹ️  Evaluating e5...[0m


# Evaluation: 100%|██████████| 1405/1405 [04:30<00:00,  5.19it/s]


In [18]:
save_path = Path("../data/predictions/lstm")
save_path.mkdir(parents=True, exist_ok=True)

for meteo_source, model_results in model_results.items():
    for gauge_id in model_results.keys():
        to_disk = model_results[gauge_id]["1D"]["xr"].to_dataframe().droplevel(1)
        to_disk.rename(
            columns={"q_mm_day_obs": "q_obs", "q_mm_day_sim": "q_sim"}, inplace=True
        )
        to_disk["gauge_id"] = gauge_id
        to_disk["dataset"] = meteo_source

        final_save = save_path / gauge_id
        final_save.mkdir(parents=True, exist_ok=True)

        to_disk.to_csv(final_save / f"{gauge_id}_{meteo_source}_predictions.csv")


# Model performance


In [19]:
fine_tune_gauges = gpd.read_file("../res/FineTuneGauges.gpkg")[
    [
        "gauge_id",
        "name_ru",
        "name_en",
        "geometry",
        "lstm_nse_mswep",
        "lstm_nse_e5l",
        "lstm_nse_e5",
        "lstm_nse_gpcp",
    ]
]
fine_tune_gauges.set_index("gauge_id", inplace=True)

ft_index = fine_tune_gauges.index.tolist()
rest_gauges = gauges.loc[~gauges.index.isin(ft_index)]
rest_index = rest_gauges.index.tolist()
