# LSTM ResOps-ES: training & testing
***

***Author:** Chus Casado Rodríguez*<br>
***Date:** 16-04-2024*<br>

**Introduction:**<br>
This _notebook_ trains and tests an LSTM model to reproduce reservoir storage.

**Por hacer**:

In [1]:
import sys
sys.path.append('../')
# import pickle
import pandas as pd
from pathlib import Path
import torch
from tqdm.notebook import tqdm

from neuralhydrology.utils.config import Config
from neuralhydrology.evaluation import metrics
from neuralhydrology.nh_run import start_run, eval_run

from model_utils import *

ModuleNotFoundError: No module named 'model_utils'

In [2]:
# from google.colab import drive
# drive.mount('/content/drive')

In [3]:
# %cd /content/drive/MyDrive/TFM/notebook/models/
# from model_utils import *
# %cd ./CAMELS-ES/

In [30]:
import os

In [33]:
os.getcwd()

'/home/casadje/neuralhydrology/models/ResOpsES'

### Configuración

In [4]:
# cargar archivo de configuración
config_file = Path('config.yml')
cfg = Config(config_file)

target = cfg.target_variables[0]

print(cfg.experiment_name)
print('epochs:', cfg.epochs, sep='\t\t')
print('hidden size:', cfg.hidden_size, sep='\t')
print('batch size:', cfg.batch_size, sep='\t')
print('dropout:', cfg.output_dropout, sep='\t')
print('clip gradients:', cfg.clip_gradient_norm, sep='\t')

reservoir_lstm
epochs:		10
hidden size:	64
batch size:	128
dropout:	0.3
clip gradients:	1


In [5]:
# simulación del entrenamiento y evaluación de todas las épocas?
run_all = True

### Entrenamiento

In [6]:
if torch.cuda.is_available():
    print('CUDA available')
    start_run(config_file=config_file)
else:
    print('Running training on the CPU')
    start_run(config_file=config_file, gpu=-1)

CUDA available
2024-04-17 14:37:03,171: Logging to /home/casadje/neuralhydrology/models/ResOpsES/runs/reservoir_lstm_1704_143703/output.log initialized.
2024-04-17 14:37:03,172: ### Folder structure created at /home/casadje/neuralhydrology/models/ResOpsES/runs/reservoir_lstm_1704_143703
2024-04-17 14:37:03,173: ### Run configurations for reservoir_lstm
2024-04-17 14:37:03,174: experiment_name: reservoir_lstm
2024-04-17 14:37:03,175: run_dir: /home/casadje/neuralhydrology/models/ResOpsES/runs/reservoir_lstm_1704_143703
2024-04-17 14:37:03,176: train_basin_file: data/sample_train.txt
2024-04-17 14:37:03,176: validation_basin_file: data/sample_validation.txt
2024-04-17 14:37:03,177: test_basin_file: data/sample_test.txt
2024-04-17 14:37:03,178: per_basin_train_periods_file: data/periods_train.pkl
2024-04-17 14:37:03,179: per_basin_validation_periods_file: data/periods_validation.pkl
2024-04-17 14:37:03,180: per_basin_test_periods_file: data/periods_test.pkl
2024-04-17 14:37:03,180: seed: 

### Simulación

In [25]:
# find the directory of the last run
root_run_dir = cfg.run_dir if cfg.run_dir else Path('./runs/')
run_dir = max([x for x in root_run_dir.iterdir() if x.is_dir() & x.stem.startswith(cfg.experiment_name)])

print(run_dir)

runs/reservoir_lstm_1704_143703


In [26]:
# # extraer el rendimiento de cada época y muestra
# metrics = pd.DataFrame(columns=range(1, cfg.epochs + 1))
# for epoch in tqdm(metrics.columns):
#     try:
#         _, metrics[epoch] = get_results(run_dir, 'validation', epoch=epoch)
#     except:
#         continue

# best_epoch = metrics.median().idxmax()
# print('mejor época: {0}\t\tKGE = {1:.3f}'.format(best_epoch, metrics.median().max()))

In [27]:
for period in ['train', 'test']:
    if run_all:
        for epoch in np.arange(1, cfg.epochs + 1):
            if torch.cuda.is_available(): # con GPU
                eval_run(run_dir=run_dir, period=period, epoch=epoch, gpu=0)
            else: # con CPU
                eval_run(run_dir=run_dir, period=period, epoch=epoch, gpu=-1)
    else:
        if torch.cuda.is_available(): # con GPU
            eval_run(run_dir=run_dir, period=period, epoch=best_epoch, gpu=0)
        else: # con CPU
            eval_run(run_dir=run_dir, period=period, epoch=best_epoch, gpu=-1)

2024-04-17 15:06:20,612: Using the model weights from runs/reservoir_lstm_1704_143703/model_epoch001.pt
# Evaluation: 100% 174/174 [00:58<00:00,  2.97it/s]
2024-04-17 15:07:19,143: Stored metrics at runs/reservoir_lstm_1704_143703/train/model_epoch001/train_metrics.csv
2024-04-17 15:07:19,398: Stored results at runs/reservoir_lstm_1704_143703/train/model_epoch001/train_results.p
2024-04-17 15:07:19,424: Using the model weights from runs/reservoir_lstm_1704_143703/model_epoch002.pt
# Evaluation: 100% 174/174 [00:55<00:00,  3.15it/s]
2024-04-17 15:08:14,656: Stored metrics at runs/reservoir_lstm_1704_143703/train/model_epoch002/train_metrics.csv
2024-04-17 15:08:14,855: Stored results at runs/reservoir_lstm_1704_143703/train/model_epoch002/train_results.p
2024-04-17 15:08:14,878: Using the model weights from runs/reservoir_lstm_1704_143703/model_epoch003.pt
# Evaluation: 100% 174/174 [00:55<00:00,  3.12it/s]
2024-04-17 15:09:10,696: Stored metrics at runs/reservoir_lstm_1704_143703/train

  r, _ = stats.pearsonr(obs.values, sim.values)


# Evaluation: 100% 58/58 [00:13<00:00,  4.27it/s]
2024-04-17 15:15:46,943: Stored metrics at runs/reservoir_lstm_1704_143703/test/model_epoch001/test_metrics.csv
2024-04-17 15:15:46,982: Stored results at runs/reservoir_lstm_1704_143703/test/model_epoch001/test_results.p
2024-04-17 15:15:47,027: Using the model weights from runs/reservoir_lstm_1704_143703/model_epoch002.pt
# Evaluation:  74% 43/58 [00:07<00:03,  4.80it/s]

  r, _ = stats.pearsonr(obs.values, sim.values)


# Evaluation: 100% 58/58 [00:10<00:00,  5.43it/s]
2024-04-17 15:15:57,727: Stored metrics at runs/reservoir_lstm_1704_143703/test/model_epoch002/test_metrics.csv
2024-04-17 15:15:57,752: Stored results at runs/reservoir_lstm_1704_143703/test/model_epoch002/test_results.p
2024-04-17 15:15:57,769: Using the model weights from runs/reservoir_lstm_1704_143703/model_epoch003.pt
# Evaluation:  74% 43/58 [00:07<00:04,  3.65it/s]

  r, _ = stats.pearsonr(obs.values, sim.values)


# Evaluation: 100% 58/58 [00:09<00:00,  6.03it/s]
2024-04-17 15:16:07,408: Stored metrics at runs/reservoir_lstm_1704_143703/test/model_epoch003/test_metrics.csv
2024-04-17 15:16:07,431: Stored results at runs/reservoir_lstm_1704_143703/test/model_epoch003/test_results.p
2024-04-17 15:16:07,449: Using the model weights from runs/reservoir_lstm_1704_143703/model_epoch004.pt
# Evaluation:  76% 44/58 [00:06<00:03,  4.50it/s]

  r, _ = stats.pearsonr(obs.values, sim.values)


# Evaluation: 100% 58/58 [00:08<00:00,  6.78it/s]
2024-04-17 15:16:16,016: Stored metrics at runs/reservoir_lstm_1704_143703/test/model_epoch004/test_metrics.csv
2024-04-17 15:16:16,038: Stored results at runs/reservoir_lstm_1704_143703/test/model_epoch004/test_results.p
2024-04-17 15:16:16,056: Using the model weights from runs/reservoir_lstm_1704_143703/model_epoch005.pt
# Evaluation:  76% 44/58 [00:06<00:03,  4.41it/s]

  r, _ = stats.pearsonr(obs.values, sim.values)


# Evaluation: 100% 58/58 [00:08<00:00,  6.96it/s]
2024-04-17 15:16:24,406: Stored metrics at runs/reservoir_lstm_1704_143703/test/model_epoch005/test_metrics.csv
2024-04-17 15:16:24,432: Stored results at runs/reservoir_lstm_1704_143703/test/model_epoch005/test_results.p
2024-04-17 15:16:24,454: Using the model weights from runs/reservoir_lstm_1704_143703/model_epoch006.pt
# Evaluation:  76% 44/58 [00:06<00:02,  5.51it/s]

  r, _ = stats.pearsonr(obs.values, sim.values)


# Evaluation: 100% 58/58 [00:08<00:00,  6.60it/s]
2024-04-17 15:16:33,259: Stored metrics at runs/reservoir_lstm_1704_143703/test/model_epoch006/test_metrics.csv
2024-04-17 15:16:33,279: Stored results at runs/reservoir_lstm_1704_143703/test/model_epoch006/test_results.p
2024-04-17 15:16:33,295: Using the model weights from runs/reservoir_lstm_1704_143703/model_epoch007.pt
# Evaluation:  76% 44/58 [00:06<00:03,  4.25it/s]

  r, _ = stats.pearsonr(obs.values, sim.values)


# Evaluation: 100% 58/58 [00:08<00:00,  6.69it/s]
2024-04-17 15:16:41,989: Stored metrics at runs/reservoir_lstm_1704_143703/test/model_epoch007/test_metrics.csv
2024-04-17 15:16:42,050: Stored results at runs/reservoir_lstm_1704_143703/test/model_epoch007/test_results.p
2024-04-17 15:16:42,098: Using the model weights from runs/reservoir_lstm_1704_143703/model_epoch008.pt
# Evaluation:  74% 43/58 [00:07<00:03,  3.99it/s]

  r, _ = stats.pearsonr(obs.values, sim.values)


# Evaluation: 100% 58/58 [00:10<00:00,  5.77it/s]
2024-04-17 15:16:52,161: Stored metrics at runs/reservoir_lstm_1704_143703/test/model_epoch008/test_metrics.csv
2024-04-17 15:16:52,184: Stored results at runs/reservoir_lstm_1704_143703/test/model_epoch008/test_results.p
2024-04-17 15:16:52,215: Using the model weights from runs/reservoir_lstm_1704_143703/model_epoch009.pt
# Evaluation:  76% 44/58 [00:06<00:02,  4.91it/s]

  r, _ = stats.pearsonr(obs.values, sim.values)


# Evaluation: 100% 58/58 [00:08<00:00,  6.66it/s]
2024-04-17 15:17:00,937: Stored metrics at runs/reservoir_lstm_1704_143703/test/model_epoch009/test_metrics.csv
2024-04-17 15:17:00,960: Stored results at runs/reservoir_lstm_1704_143703/test/model_epoch009/test_results.p
2024-04-17 15:17:00,976: Using the model weights from runs/reservoir_lstm_1704_143703/model_epoch010.pt
# Evaluation:  76% 44/58 [00:08<00:03,  4.63it/s]

  r, _ = stats.pearsonr(obs.values, sim.values)


# Evaluation: 100% 58/58 [00:10<00:00,  5.66it/s]
2024-04-17 15:17:11,242: Stored metrics at runs/reservoir_lstm_1704_143703/test/model_epoch010/test_metrics.csv
2024-04-17 15:17:11,284: Stored results at runs/reservoir_lstm_1704_143703/test/model_epoch010/test_results.p


## Results

### Evolution of training and validation

In [12]:
# extraer el rendimiento de cada época y muestra
metrics = {period: pd.DataFrame(columns=range(1, cfg.epochs + 1)) for period in ['train', 'validation', 'test']}
for period, df in tqdm(metrics.items(), desc='period'):
    for epoch in df.columns:
        try:
            _, df[epoch] = get_results(run_dir, period, epoch=epoch)
        except:
            continue

period:   0%|          | 0/3 [00:00<?, ?it/s]

In [14]:
metrics['validation']

Unnamed: 0_level_0,1,2,3,4,5,6,7,8,9,10
basin,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
725,0.449441,0.561991,,,0.483910,0.592641,,0.568898,0.551763,
2338,0.461411,0.402606,0.587517,,,0.506276,0.538376,,,
1565,0.218295,0.268661,0.169933,,0.246504,0.286961,,,,
829,-0.235245,-0.147866,,,,,,-0.418069,,
2067,0.123223,0.319074,0.287307,,,,,0.250332,0.270311,0.307424
...,...,...,...,...,...,...,...,...,...,...
1078,0.186970,,0.426554,0.458636,0.361766,0.422083,0.308922,0.382126,,0.275326
441,0.178463,,,,,0.376979,0.418456,,,0.473727
2346,0.669235,0.671023,0.434089,0.564330,0.631388,,,0.747676,,0.682182
2076,-0.083248,,0.011260,,,-0.051651,-0.146376,-0.083271,,0.065376


In [None]:
bp = plt.boxplot(metrics['test']);
# for box in bp['boxes']:
#     box.set_facecolor('steelblue')

In [None]:
fig, ax = plt.subplots(figsize=(20, 5))

colors = ['steelblue', 'indianred', 'khaki']
for i, ((period, df), c) in enumerate(zip(metrics.items(), colors)):
    bp = ax.boxplot(df, positions=df.columns + i * .25, widths=0.25,
                    patch_artist=True,
                    boxprops={'color': c, 'alpha': .5},
                    # whiskerprops={},
                    showfliers=False,
                    medianprops={'color': 'k'})
    for box in bp['boxes']:
        box.set_facecolor(c)

# ax.set_ylim(-1, 1);
xticks = np.linspace(1, epochs, num=epochs).astype(int)
ax.set_xticks(xticks)
ax.set_xticklabels(xticks)
ax.set(xlabel='epoch (-)',
       ylabel=f'{metric} (-)');

# plt.savefig(run_dir / 'evolucion_KGE.jpg', dpi=300, bbox_inches='tight')

### Best epoch

In [None]:
# mejor época de cada muestra y el rendimiento de cada muestra en esa época
best_epoch = {}
for period, df in metrics.items():
    epoch = df.median().idxmax()
    best_epoch[period] = epoch
    print('epoch {0:>2}:\tKGEtrain = {1:.3f}   KGEval = {2:.3f}   KGEtest = {3:.3f}'.format(epoch,
                                                                                         metrics['train'][epoch].median(),
                                                                                         metrics['validation'][epoch].median(),
                                                                                         metrics['test'][epoch].median()))
epoch = best_epoch['validation']

In [None]:
# rendimiento y series simuladas de la mejor época
series = {}
rendimiento = pd.DataFrame()
for period in ['train', 'validation', 'test']:
    # importar resultados
    series_period, rend_period = get_results(run_dir, period, epoch=epoch)
    rend_period.columns = [period]
    series[period] = series_period
    rendimiento = pd.concat((rendimiento, rend_period), axis=1)

In [None]:
# hidrogramas
# if hidrogramas:
for period, dct in series.items():
    path = run_dir / 'plots' / period
    path.mkdir(parents=True, exist_ok=True)
    plot_results(dct, period, target, save=path)

In [None]:
rendimiento