# Evaluation
***

***Author:** Chus Casado Rodríguez*<br>
***Date:** 16-045-2024*<br>

**Introduction:**<br>

**Por hacer**:

In [1]:
import os
import sys
sys.path.append('../')
# import pickle
import pandas as pd
from pathlib import Path
import torch
from tqdm.notebook import tqdm

from neuralhydrology.utils.config import Config
from neuralhydrology.evaluation import metrics
from neuralhydrology.nh_run import start_run, eval_run

from model_utils import *

In [2]:
# set device type
if torch.cuda.is_available():
    gpu = 0
    print('running on GPU')
    print('no. GPU available:\t{0}'.format(torch.cuda.device_count()))
    !nvidia-smi
else:
    gpu = -1
    print('running on CPU')

running on GPU
no. GPU available:	1
Fri May 17 09:38:57 2024       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 525.60.13    Driver Version: 525.60.13    CUDA Version: 12.0     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Quadro RTX 8000     On   | 00000000:A1:00.0 Off |                  Off |
| 33%   57C    P2   218W / 260W |   3405MiB / 49152MiB |     69%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+---------------------------------------------------

### Configuración

In [3]:
USE = None
SINGLE_USE = False
TARGET = 'volume'
RUN_ALL = True # evaluate all the epochs?
CONFIG_FILE = 'config_V_lstm256_fc00_do04.yml'

In [4]:
# cargar archivo de configuración
if USE:
    exp_dir = Path(f'./{USE}/{TARGET}/')
else:
    exp_dir = Path(f'./all/{TARGET}/')
os.chdir(exp_dir)
cfg = Config(Path(CONFIG_FILE))

target = cfg.target_variables[0]

print(cfg.experiment_name)
print('epochs:', cfg.epochs, sep='\t\t')
print('hidden size:', cfg.hidden_size, sep='\t')
print('batch size:', cfg.batch_size, sep='\t')
print('dropout:', cfg.output_dropout, sep='\t')
print('clip gradients:', cfg.clip_gradient_norm, sep='\t')

V_lstm256_fc00_do04
epochs:		20
hidden size:	256
batch size:	128
dropout:	0.4
clip gradients:	1


### Evaluation

In [5]:
# find the directory of the last run
root_run_dir = cfg.run_dir if cfg.run_dir else Path('./runs/')
run_dir = max([x for x in root_run_dir.iterdir() if x.is_dir() & x.stem.startswith(cfg.experiment_name)])

print('run directory:', run_dir, sep='\t')

run directory:	runs/V_lstm256_fc00_do04_1605_161432


In [6]:
# # extraer el rendimiento de cada época y muestra
# metrics = pd.DataFrame(columns=range(1, cfg.epochs + 1))
# for epoch in tqdm(metrics.columns):
#     try:
#         _, metrics[epoch] = get_results(run_dir, 'validation', epoch=epoch)
#     except:
#         continue

# best_epoch = metrics.median().idxmax()
# print('mejor época: {0}\t\tKGE = {1:.3f}'.format(best_epoch, metrics.median().max()))

In [7]:
for period in ['train']:#, 'test']:
    if RUN_ALL:
        for epoch in np.arange(15, cfg.epochs + 1):
            eval_run(run_dir=run_dir, period=period, epoch=epoch, gpu=gpu)
    else:
        eval_run(run_dir=run_dir, period=period, epoch=best_epoch, gpu=gpu)

# Evaluation: 100% 203/203 [05:36<00:00,  1.66s/it]
# Evaluation: 100% 203/203 [05:33<00:00,  1.64s/it]
# Evaluation: 100% 203/203 [05:41<00:00,  1.68s/it]
# Evaluation: 100% 203/203 [05:39<00:00,  1.67s/it]
# Evaluation: 100% 203/203 [05:37<00:00,  1.66s/it]
# Evaluation: 100% 203/203 [05:39<00:00,  1.67s/it]


## Results

### Evolution of training and validation

In [None]:
# extraer el rendimiento de cada época y muestra
metrics = {period: pd.DataFrame(columns=range(1, cfg.epochs + 1)) for period in ['train', 'validation']} # ['train', 'validation', 'test']}
for period, df in tqdm(metrics.items(), desc='period'):
    for epoch in df.columns:
        try:
            _, df[epoch] = get_results(run_dir, period, epoch=epoch)
        except:
            continue

In [None]:
import matplotlib.patches as mpatches

In [None]:
fig, ax = plt.subplots(figsize=(20, 5))

colors = ['steelblue', 'indianred', 'khaki']
patches = []
for i, ((period, df), c) in enumerate(zip(metrics.items(), colors)):
    bp = ax.boxplot(df, positions=df.columns + i * .25, widths=0.25,
                    patch_artist=True,
                    boxprops={'color': c, 'alpha': .5},
                    # whiskerprops={},
                    showfliers=False,
                    medianprops={'color': 'k'})
    for box in bp['boxes']:
        box.set_facecolor(c)
    patches.append(mpatches.Patch(color=c, alpha=.5, label=period))

# ax.set_ylim(-1, 1);
xticks = np.linspace(1, cfg.epochs, num=cfg.epochs).astype(int)
ax.set_xticks(xticks)
ax.set_xticklabels(xticks)
ax.set(xlabel='epoch (-)',
       ylabel=f'{cfg.metrics[0]} (-)');

ax.legend(handles=patches, frameon=False, bbox_to_anchor=[1, .35, .1, .3]);

plt.savefig(run_dir / 'evolucion_KGE.jpg', dpi=300, bbox_inches='tight')

### Best epoch

In [None]:
# mejor época de cada muestra y el rendimiento de cada muestra en esa época
best_epoch = {}
for period, df in metrics.items():
    epoch = df.median().idxmax()
    best_epoch[period] = epoch
    print('epoch {0:>2}:\tKGEtrain = {1:.3f}   KGEval = {2:.3f}'.format(epoch,
                                                                        metrics['train'][epoch].median(),
                                                                        metrics['validation'][epoch].median()))
epoch = best_epoch['validation']

In [None]:
# rendimiento y series simuladas de la mejor época
series = {}
rendimiento = pd.DataFrame()
for period in ['train', 'validation']:
    # importar resultados
    series_period, rend_period = get_results(run_dir, period, epoch=epoch)
    rend_period.columns = [period]
    series[period] = series_period
    rendimiento = pd.concat((rendimiento, rend_period), axis=1)

In [None]:
# hidrogramas
# if hidrogramas:
for period, dct in series.items():
    path = run_dir / 'plots' / period
    path.mkdir(parents=True, exist_ok=True)
    plot_results(dct, period, target=cfg.target_variables[0], ylim=(-.05, 1.05), ylabel='fraction filled (-)', save=None)#path)