In [19]:
# import libraries
import yaml
import pickle
from pathlib import Path
import matplotlib.pyplot as plt
from neuralhydrology.evaluation.plots import percentile_plot_plotly
from neuralhydrology.utils.configutils import create_config_files
from neuralhydrology.nh_run_scheduler import schedule_runs
from neuralhydrology.evaluation.tuning import generate_run_dir_patterns
from neuralhydrology.evaluation.tuning import get_best_model
from neuralhydrology.nh_run import eval_run
from neuralhydrology.evaluation import metrics, get_tester
from neuralhydrology.utils.config import Config

### Perform two stage hyperparametertuning

1. **Stage**: 
   - All models are run for 100 epochs using the Adam optimizer with:
     - Learning rate: 5e−3
     - Batch size: 256
   - During training, the model is validated after every 4 epochs on validation period data.

In [2]:
# Create grid for first stage
grid1 = {
    'hidden_size': [128, 256, 512],  # the number of cells in the LSTM; see Kratzert et al., 2019)
    'n_distributions': [1, 3, 5],  # the number of density heads in the mixture
    'target_noise_std': [0.05, 0.1, 0.2],  # the amount of relative noise of the output
    'output_dropout': [0.0, 0.2, 0.4],  # dropout rate on the head layer
}
# Create configs for all possible parameter combinations
# create_config_files(base_config_path=Path('base_stage1.yml'), modify_dict=grid1, output_dir=Path('configs_stage1'))

### Train model with negative logarithm of the likelihood

In [3]:
# Train models for all configs from one call of the first stage
# schedule_runs(mode='train', directory=Path('configs_stage1'), gpu_ids=[0], runs_per_gpu=1)

In [3]:
# Get run directories for the first stage
run_patterns = generate_run_dir_patterns(grid1, 'single_basin_multi_forcing_mdn')

In [None]:
# Get best model
best_model = get_best_model(parent_dir='runs', grid_params=list(grid1.keys()), run_patterns=run_patterns)

In [None]:
# Print epoch, median NSE, and hyperparameters for each seed
def print_grid_parameters(best_model, grid_params):
    if best_model['best_model_config'] is not None:
        config = best_model['best_model_config']
        params = {k: config.get(k) for k in grid_params if k in config}
        print(f"Best epoch: {best_model['best_epoch']}")
        print(f"Best median NSE score: {best_model['best_nse']}")
        print(f"Parameters: {params}")
print_grid_parameters(best_model, list(grid1.keys()))

In [4]:
best_params1 = {'hidden_size': 256, 'n_distributions': 5, 'target_noise_std': 0.1, 'output_dropout': 0.0}

In [None]:
# write parameters to a dictionary
best_params1 = {k: v for k, v in best_model['best_model_config'].items() if k in grid1}

In [5]:
# Create config file for the second stage including hyperparameters of the best model from the first stage
# Read in the base.yml file and replace the hyperparameters with the best hyperparameters from the first stage
with open('base_stage1.yml', 'r') as file:
    base_config = yaml.safe_load(file)

# Update the base config with the best hyperparameters from the first stage
base_config.update(best_params1)

# Save updated config to a new file
with open('base_stage2.yml', 'w') as file:
    yaml.dump(base_config, file)

# Create grid for second stage
grid2 = {
    'learning_rate': [0.001, 0.0001, 0.0005],  # Learning rate
    'batch_size': [128, 256],  # Batch size
}

# create_config_files(base_config_path=Path('base_stage2.yml'), modify_dict=grid2, output_dir=Path('configs_stage2'))

In [6]:
# Train models for all configs from one call of stage 2
schedule_runs(mode='train', directory=Path('configs_stage2'), gpu_ids=[0], runs_per_gpu=2)

Done


In [7]:
# Get run directories for the second stage
run_patterns = generate_run_dir_patterns(grid2, 'single_basin_multi_forcing_mdn')

In [8]:
# Get best model
best_model = get_best_model(parent_dir='runs', grid_params=list(grid2.keys()), run_patterns=run_patterns)

Evaluating.
Finished.


In [9]:
# Print epoch, median NSE, and hyperparameters for each seed
def print_grid_parameters(best_model, grid_params):
    if best_model['best_model_config'] is not None:
        config = best_model['best_model_config']
        params = {k: config.get(k) for k in grid_params if k in config}
        print(f"Best epoch: {best_model['best_epoch']}")
        print(f"Best median NSE score: {best_model['best_median_nse']}")
        print(f"Parameters: {params}")
print_grid_parameters(best_model, list(grid2.keys()))

Best epoch: 72
Best median NSE score: 0.8960563093423843
Parameters: {'learning_rate': 0.001, 'batch_size': 128}


In [10]:
# chose best model path
best_model_dir = best_model['best_model_dir']
# chose best epoch
best_epoch = f"{best_model['best_epoch']}"
# Write best parameters to a dictionary
best_params2 = {k: v for k, v in best_model['best_model_config'].items() if k in grid2}

In [11]:
# Create config file for the second stage including hyperparameters of the best model from the first stage
# Read in the base.yml file and replace the hyperparameters with the best hyperparameters from the first stage
with open('base_stage2.yml', 'r') as file:
    base_config = yaml.safe_load(file)

# Update the base config with the best hyperparameters from the first stage
base_config.update(best_params2)

# Save the updated config to a new file
with open('base_final.yml', 'w') as file:
    yaml.dump(base_config, file)

In [12]:
eval_run(run_dir=best_model_dir, period="test", epoch=best_epoch)

# Evaluation: 100%|██████████| 1/1 [00:01<00:00,  1.45s/it]


In [20]:
# create a tester instance and start evaluation
config = Config(Path("base_final.yml"))
tester = get_tester(cfg=Config(best_model_dir / "config.yml"), run_dir=best_model_dir, period="test", init_model=True)
results = tester.evaluate(save_results=False, metrics=config.metrics)

results.keys()

# Evaluation: 100%|██████████| 1/1 [00:01<00:00,  1.08s/it]


dict_keys(['DE4'])

In [29]:
from neuralhydrology.modelzoo.cudalstm import CudaLSTM
import torch

In [32]:
cmallstm_config = Config(Path("base_final.yml"))

# create a new model instance with random weights
cuda_lstm = CudaLSTM(cfg=cmallstm_config)

# load the trained weights into the new model. 
model_path = best_model_dir / 'model_epoch030.pt'
model_weights = torch.load(str(model_path), map_location='cpu')  # load the weights from the file, creating the weight tensors on CPU
cuda_lstm.load_state_dict(model_weights)  # set the new model's weights to the values loaded from file
cuda_lstm

CudaLSTM(
  (embedding_net): InputLayer(
    (statics_embedding): Identity()
    (dynamics_embedding): Identity()
  )
  (lstm): LSTM(12, 256)
  (dropout): Dropout(p=0.0, inplace=False)
  (head): CMAL(
    (fc1): Linear(in_features=256, out_features=100, bias=True)
    (fc2): Linear(in_features=100, out_features=20, bias=True)
    (_softplus): Softplus(beta=2, threshold=20.0)
  )
)

In [37]:
from neuralhydrology.datasetzoo import get_dataset
from neuralhydrology.datautils.utils import load_scaler
from torch.utils.data import DataLoader

In [46]:
# make sure we're in eval mode where dropout is deactivated
cuda_lstm.eval()

# load the dataset
scaler = load_scaler(best_model_dir)
dataset = get_dataset(cmallstm_config, is_train=False, period='test', scaler=scaler)
dataloader = DataLoader(dataset, batch_size=256, shuffle=False, collate_fn=dataset.collate_fn)

cudalstm_output = []
customlstm_output = []
# no need to calculate any gradients since we're just running some evaluations
with torch.no_grad():
    for sample in dataloader:
        cudalstm_output.append(cuda_lstm(sample))

print('CudaLSTM output:  ', list(cudalstm_output[0].keys()))

CudaLSTM output:   ['lstm_output', 'h_n', 'c_n', 'mu', 'b', 'tau', 'pi']


In [47]:
print('CudaLSTM shape:  ', cudalstm_output[0]['pi'].shape)  # [batch size, sequence length, number of mixture components]

CudaLSTM shape:   torch.Size([256, 365, 5])


In [48]:
import pandas as pd

In [50]:
# Concatenate all batches into one tensor that contains the final time step of each sample.
mix_comp_weights = torch.cat([out['pi'][:, -1, :] for out in cudalstm_output], dim=0)

# Load the forcings input for the corresponding date range
date_range = pd.date_range(cmallstm_config.test_start_date, cmallstm_config.test_end_date, freq='1D')

In [57]:
mix_comp_weights
# plot the mixture component weights
plt.figure(figsize=(15, 5))
plt.plot(date_range, mix_comp_weights.numpy())
plt.xlabel('Date')
plt.ylabel('Mixture component weights')
plt.title('Mixture component weights over time')
plt.legend([f'Mixture component {i}' for i in range(mix_comp_weights.shape[1])])
plt.show()

TypeError: percentile_plot_plotly() got an unexpected keyword argument 'plot_title'

In [24]:
# Extract observations and simulations
qobs = results['DE4']['1D']['xr']['discharge_vol_obs']
qsim = results['DE4']['1D']['xr']['discharge_vol_sim']

In [25]:
fig = percentile_plot_plotly(qobs, qsim, title="Percentile Plot - Test Period")
fig.show()