In [21]:
# imports
%load_ext autoreload 
%autoreload 2

import pandas as pd
from pathlib import Path
import matplotlib.pyplot as plt
from hydroml.utils import helpers as h
from hydroml.training.finetune import run_finetune_from_timeseries
from hydroml.workflow.prediction import run_hydrological_simulation
from hydroml.evaluation.metrics import Metrics   
from hydroml.config.config import load_config

def get_metrics(ds):
    metrics = Metrics(ds['y'], ds['prediction']).all_metrics().to_dataframe().reset_index().drop(columns=['catchment_id', 'lead_time', 'feature'])
    return metrics

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [22]:
# For this example we need a trained model.
model_path = Path('../sample_data/model/version_0')

# we need to convert the transform_parameter_path to an absolute path so all the finetuned models
# to refer to the same parameters and do not calculate a new one for each catchment.
transform_parameter_path = (model_path / 'params.yaml').absolute()


catchment_id = '401208'
dynamic_data=pd.read_csv(f'../sample_data/{catchment_id}.csv', index_col=0, parse_dates=True)
static_data=h.read_json(f'../sample_data/{catchment_id}_attributes.json')


# Split the data into calibration and validation periods
We extract the calibration and validation periods from the config file and use them to split our data.
This ensures we use the same periods that were used during model training.


In [23]:
from hydroml.config.config import load_config


config = load_config(model_path / 'config.yaml')
cal_periods = config.cal['periods']
val_periods = config.val['periods']

cal_dynamic_data = pd.concat([dynamic_data.loc[s:e] for s, e in cal_periods])
val_dynamic_data = pd.concat([dynamic_data.loc[s:e] for s, e in val_periods])

# Run the simulation
For benchmarking we run the original/pretrained model first.

We can easily adjust the config parameters for the simulation by passing them as kwargs to the run_hydrological_simulation function here we need to change the device to cpu and pass the transform_parameter_path to the simulation so it uses the same parameters as the finetuned model.

In [24]:
kwargs = {'transform_parameter_path': transform_parameter_path,
          'device': 'cpu', 
          }

In [25]:
simulation_using_original_model = run_hydrological_simulation(model_path, val_dynamic_data, static_data, catchment_id, **kwargs)

Transforming data: loading transform parameters from p:\work\sho108\hydroml\examples\notebooks\..\sample_data\model\version_0\params.yaml




Now we finetune the model for the catchment we are interested in using the calibration data. Then we can run the simulation using the finetuned model for the validation data.

In [26]:
# We need to adjust batch size to be able to fit the model in the memory. device='cpu' if no gpu is available.
# When no layer_to_finetune is provided, all paarameters in the model are tuned.
p,v = run_finetune_from_timeseries(model_path, cal_dynamic_data, static_data, catchment_id, device='cpu', batch_size=128)
finetuned_model_path = Path(p) / v
simulation_using_finetuned_model = run_hydrological_simulation(finetuned_model_path, val_dynamic_data, static_data, catchment_id, **kwargs)


metrics =pd.concat([get_metrics(simulation_using_original_model), get_metrics(simulation_using_finetuned_model)]).T
metrics.columns = ['original', 'finetuned']

metrics

params.yaml
Transforming data: loading transform parameters from ..\sample_data\model\version_0\params.yaml


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
c:\Users\sho108\AppData\Local\pypoetry\Cache\virtualenvs\hydroml-dFLAodHf-py3.11\Lib\site-packages\pytorch_lightning\trainer\configuration_validator.py:70: You defined a `validation_step` but have no `val_dataloader`. Skipping val loop.
c:\Users\sho108\AppData\Local\pypoetry\Cache\virtualenvs\hydroml-dFLAodHf-py3.11\Lib\site-packages\pytorch_lightning\callbacks\model_checkpoint.py:654: Checkpoint directory \\fs1-cbr.nexus.csiro.au\{ev-ca-macq}\work\sho108\hydroml\examples\sample_data\model\version_0\finetune_all\401208\241217111535_67d6 exists and is not empty.

  | Name              | Type       | Params | Mode 
---------------------------------------------------------
0 | static_embedding  | Linear     | 15     | train
1 | dynamic_embedding | Linear     | 6      | train
2 | lstm              | LSTM       | 266 K  | train
3 | dropout           | Identity   | 0      | train
4 

..\sample_data\model\version_0\finetune_all\401208\241217111535_67d6


Training: |          | 0/? [00:00<?, ?it/s]

`Trainer.fit` stopped: `max_epochs=15` reached.


Transforming data: loading transform parameters from p:\work\sho108\hydroml\examples\notebooks\..\sample_data\model\version_0\params.yaml




Unnamed: 0,original,finetuned
nse,0.662218,0.890566
kge,0.60203,0.829118
rmse,0.541452,0.30819
bias,1.320835,0.866343
relative_bias,0.320835,-0.133657
absolute_bias,1.320835,0.866343
nse_sqrt,0.7203,0.785461


In [27]:
# we need to adjust batch size to be able to fit the model in the memory. device='cpu' if no gpu is available. 
# This tune only the parameters in the layers_to_finetune.
p,v = run_finetune_from_timeseries(model_path, cal_dynamic_data, static_data, catchment_id, device='cpu', batch_size=128, max_epochs=20, layers_to_finetune=['head', 'dynamic_embedding'])
partial_finetuned_model_path = Path(p) / v
simulation_using_partial_finetuned_model = run_hydrological_simulation(partial_finetuned_model_path, val_dynamic_data, static_data, catchment_id, **kwargs)





params.yaml
Transforming data: loading transform parameters from ..\sample_data\model\version_0\params.yaml


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
c:\Users\sho108\AppData\Local\pypoetry\Cache\virtualenvs\hydroml-dFLAodHf-py3.11\Lib\site-packages\pytorch_lightning\trainer\configuration_validator.py:70: You defined a `validation_step` but have no `val_dataloader`. Skipping val loop.
c:\Users\sho108\AppData\Local\pypoetry\Cache\virtualenvs\hydroml-dFLAodHf-py3.11\Lib\site-packages\pytorch_lightning\callbacks\model_checkpoint.py:654: Checkpoint directory \\fs1-cbr.nexus.csiro.au\{ev-ca-macq}\work\sho108\hydroml\examples\sample_data\model\version_0\finetune_head_dynamic_embedding\401208\241217112210_12b1 exists and is not empty.

  | Name              | Type       | Params | Mode 
---------------------------------------------------------
0 | static_embedding  | Linear     | 15     | train
1 | dynamic_embedding | Linear     | 6      | train
2 | lstm              | LSTM       | 266 K  | train
3 | dropout           | Identity   

..\sample_data\model\version_0\finetune_head_dynamic_embedding\401208\241217112210_12b1


Training: |          | 0/? [00:00<?, ?it/s]

`Trainer.fit` stopped: `max_epochs=15` reached.


Transforming data: loading transform parameters from p:\work\sho108\hydroml\examples\notebooks\..\sample_data\model\version_0\params.yaml




# Calculate the metrics

To compare the performance of the different models we calculate the metrics for each model.

In [28]:
metrics =pd.concat([get_metrics(simulation_using_original_model), get_metrics(simulation_using_finetuned_model), get_metrics(simulation_using_partial_finetuned_model)]).T
metrics.columns = ['original', 'finetuned', 'partial_finetuned']

metrics

Unnamed: 0,original,finetuned,partial_finetuned
nse,0.662218,0.890566,0.82796
kge,0.60203,0.829118,0.888905
rmse,0.541452,0.30819,0.386417
bias,1.320835,0.866343,1.054681
relative_bias,0.320835,-0.133657,0.054681
absolute_bias,1.320835,0.866343,1.054681
nse_sqrt,0.7203,0.785461,0.839433
