# Notebook for Run Training

## Import module

In [2]:
import pickle
from pathlib import Path

import matplotlib.pyplot as plt
import torch
from neuralhydrology.evaluation import metrics
from neuralhydrology.nh_run import start_run, eval_run
from neuralhydrology.nh_run_scheduler import schedule_runs

## Training Model

1. Base Model = LSTM without dropout rate
2. MCD Model = LSTM with Regression head, Monte Carlo Dropout, and dropout rate
3. UMAL Model = LSTM with UMAL head and dropout rate

### Base Model

In [2]:
# by default we assume that you have at least one CUDA-capable NVIDIA GPU
if torch.cuda.is_available():
    start_run(config_file=Path("base_model_cudalstm.yml"))

# fall back to CPU-only mode
else:
    start_run(config_file=Path("base_model_cudalstm.yml"), gpu=-1)

2024-01-24 16:39:16,124: Logging to C:\Users\Dwiva5\Downloads\DSAI\Project\hbv-runoff\Project-Run\runs\run_cudalstm_2401_163916\output.log initialized.
2024-01-24 16:39:16,124: ### Folder structure created at C:\Users\Dwiva5\Downloads\DSAI\Project\hbv-runoff\Project-Run\runs\run_cudalstm_2401_163916
2024-01-24 16:39:16,124: ### Run configurations for run_cudalstm
2024-01-24 16:39:16,125: experiment_name: run_cudalstm
2024-01-24 16:39:16,125: train_basin_file: basin_huc_17.txt
2024-01-24 16:39:16,125: validation_basin_file: basin_huc_17.txt
2024-01-24 16:39:16,126: test_basin_file: basin_huc_17.txt
2024-01-24 16:39:16,126: train_start_date: 1980-01-01 00:00:00
2024-01-24 16:39:16,127: train_end_date: 1999-12-31 00:00:00
2024-01-24 16:39:16,127: validation_start_date: 2000-01-01 00:00:00
2024-01-24 16:39:16,128: validation_end_date: 2004-12-31 00:00:00
2024-01-24 16:39:16,128: test_start_date: 2005-01-01 00:00:00
2024-01-24 16:39:16,128: test_end_date: 2009-12-31 00:00:00
2024-01-24 16:3

KeyboardInterrupt: 

### MCD Model

In [None]:
# by default we assume that you have at least one CUDA-capable NVIDIA GPU
if torch.cuda.is_available():
    start_run(config_file=Path("mcd_model_cudalstm.yml"))

# fall back to CPU-only mode
else:
    start_run(config_file=Path("mcd_model_cudalstm.yml"), gpu=-1)

### UMAL Model

In [3]:
# by default we assume that you have at least one CUDA-capable NVIDIA GPU
if torch.cuda.is_available():
    start_run(config_file=Path("umal_model_cudalstm.yml"))

# fall back to CPU-only mode
else:
    start_run(config_file=Path("umal_model_cudalstm.yml"), gpu=-1)

2024-01-24 21:14:50,662: Logging to C:\Users\Dwiva5\Downloads\DSAI\Project\hbv-runoff\Project-Run\runs\run_cudalstm_2401_211450\output.log initialized.
2024-01-24 21:14:50,662: ### Folder structure created at C:\Users\Dwiva5\Downloads\DSAI\Project\hbv-runoff\Project-Run\runs\run_cudalstm_2401_211450
2024-01-24 21:14:50,663: ### Run configurations for run_cudalstm
2024-01-24 21:14:50,663: experiment_name: run_cudalstm
2024-01-24 21:14:50,664: train_basin_file: basin_huc_17.txt
2024-01-24 21:14:50,664: validation_basin_file: basin_huc_17.txt
2024-01-24 21:14:50,664: test_basin_file: basin_huc_17.txt
2024-01-24 21:14:50,665: train_start_date: 1980-01-01 00:00:00
2024-01-24 21:14:50,665: train_end_date: 1999-12-31 00:00:00
2024-01-24 21:14:50,667: validation_start_date: 2000-01-01 00:00:00
2024-01-24 21:14:50,667: validation_end_date: 2004-12-31 00:00:00
2024-01-24 21:14:50,668: test_start_date: 2005-01-01 00:00:00
2024-01-24 21:14:50,668: test_end_date: 2009-12-31 00:00:00
2024-01-24 21:1

RuntimeError: Loss was NaN for 1 times in a row. Stopped training.

## Evaluate run on test set
The run directory that needs to be specified for evaluation is printed in the output log above. Since the folder name is created dynamically (including the date and time of the start of the run) you will need to change the `run_dir` argument according to your local directory name. By default, it will use the same device as during the training process.

In [None]:
run_dir = Path("runs/test_run_1601_104731")
eval_run(run_dir=run_dir, period="test")

### Load and inspect model predictions
Next, we load the results file and compare the model predictions with observations. The results file is always a pickled dictionary with one key per basin (even for a single basin). The next-lower dictionary level is the temporal resolution of the predictions. In this case, we trained a model only on daily data ('1D'). Within the temporal resolution, the next-lower dictionary level are `xr`(an xarray Dataset that contains observations and predictions), as well as one key for each metric that was specified in the config file.

In [None]:
with open(run_dir / "test" / "model_epoch050" / "test_results.p", "rb") as fp:
    results = pickle.load(fp)
    
results.keys()

The data variables in the xarray Dataset are named according to the name of the target variables, with suffix `_obs` for the observations and suffix `_sim` for the simulations.

In [None]:
results['10258000']['1D']['xr']

Let's plot the model predictions vs. the observations

In [None]:
# extract observations and simulations
qobs = results['10258000']['1D']['xr']['QObs(mm/d)_obs']
qsim = results['10258000']['1D']['xr']['QObs(mm/d)_sim']

fig, ax = plt.subplots(figsize=(16,10))
ax.plot(qobs['date'], qobs)
#ax.plot(qsim['date'], qsim)
ax.set_ylabel("Discharge (mm/d)")
ax.set_title(f"Test period - NSE {results['10258000']['1D']['NSE']:.3f}")

In [None]:
from neuralhydrology.evaluation.plots import percentile_plot, regression_plot, uncertainty_plot

In [None]:
qobstest = qobs.values

In [None]:
# Extract observations and simulations
qobs = results['10258000']['1D']['xr']['QObs(mm/d)_obs']
qsim = results['10258000']['1D']['xr']['QObs(mm/d)_sim']

# Plot observations
fig, ax = plt.subplots(figsize=(16, 10))
ax.plot(qobs['date'], qobs, label='Observations')

# Plot all simulation samples
for i in range(qsim.shape[2]):
    ax.plot(qsim['date'], qsim.isel(samples=i), color='orange', alpha=0.2, label='_nolegend_')  # Plot each sample with low alpha for transparency

ax.set_ylabel("Discharge (mm/d)")
ax.set_title(f"Test period - NSE {results['10258000']['1D']['NSE']:.3f}")
ax.legend()
plt.show()


In [None]:
percentile_plot(qobstest, qsim)

Next, we are going to compute all metrics that are implemented in the NeuralHydrology package. You will find additional hydrological signatures implemented in `neuralhydrology.evaluation.signatures`.

In [None]:
values = metrics.calculate_all_metrics(qobs.isel(time_step=-1), qsim.isel(time_step=-1))
for key, val in values.items():
    print(f"{key}: {val:.3f}")