In [16]:
import pickle
from pathlib import Path
from typing import Dict
import matplotlib.pyplot as plt
import pandas as pd
import torch
from torch.utils.data import DataLoader
from tqdm.auto import tqdm

from neuralhydrology.datasetzoo import get_dataset#, camelsus
from neuralhydrology.datautils.utils import load_scaler
from neuralhydrology.modelzoo.cudalstm import CudaLSTM
from neuralhydrology.modelzoo.customlstm import CustomLSTM
from neuralhydrology.nh_run import start_run
from neuralhydrology.utils.config import Config

In [15]:
# set device type
if torch.cuda.is_available():
    device = 'cuda'
    print('running on GPU')
    print('no. GPU available:\t{0}'.format(torch.cuda.device_count()))
    !nvidia-smi
else:
    device = 'cpu'
    print('running on CPU')

running on GPU
no. GPU available:	1
Thu May 16 15:11:27 2024       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 525.60.13    Driver Version: 525.60.13    CUDA Version: 12.0     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Quadro RTX 8000     On   | 00000000:A1:00.0 Off |                  Off |
| 33%   33C    P8    24W / 260W |      3MiB / 49152MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+---------------------------------------------------

## Training data

In [2]:
with open('./train_data/train_data.p', 'rb') as file:
    # Load the object from the pickle file
    train_data = pickle.load(file)

In [6]:
type(train_data)

dict

In [7]:
train_data.keys()

dict_keys(['coords', 'attrs', 'dims', 'data_vars'])

In [11]:
train_data['coords'].keys()

dict_keys(['date', 'basin'])

In [13]:
train_data['coords']['basin'].keys()

dict_keys(['dims', 'attrs', 'data'])

In [16]:
train_data['dims']

{'date': 11686, 'basin': 203}

In [18]:
train_data['data_vars'].keys()

dict_keys(['dayofweek', 'dayofyear', 'evapo_point_emo1', 'inflow_efas5', 'month', 'precip_point_emo1', 'temp_areal_emo1', 'volume', 'weekofyear', 'year'])

## States and activations

In [3]:
config_file = Path("config_V_lstm064_fc00_do04.yml")
cfg = Config(config_file)

In [4]:
# find the directory of the last run
root_run_dir = cfg.run_dir if cfg.run_dir else Path('./runs/')
run_dir = max([x for x in root_run_dir.iterdir() if x.is_dir() & x.stem.startswith(cfg.experiment_name)])

print(run_dir)

runs/V_lstm064_fc00_do04_1605_090614


In [26]:
# create a new model instance with random weights
cuda_lstm = CudaLSTM(cfg=cfg)

# load the trained weights into the new model.
model_path = max(run_dir.glob('model_epoch*.pt'))
model_weights = torch.load(f=model_path)#, map_location=device)  # load the weights from the file, creating the weight tensors on CPU
cuda_lstm.load_state_dict(model_weights)  # set the new model's weights to the values loaded from file
cuda_lstm.to(device)

print('no. parameters:\t{0}'.format(sum(p.numel() for p in cuda_lstm.parameters())))
print('device:\t\t{0}'.format(next(cuda_lstm.parameters()).device))

cuda_lstm

no. parameters:	22593
device:		cuda:0


CudaLSTM(
  (embedding_net): InputLayer(
    (statics_embedding): Identity()
    (dynamics_embedding): Identity()
  )
  (lstm): LSTM(22, 64)
  (dropout): Dropout(p=0.4, inplace=False)
  (head): Regression(
    (net): Sequential(
      (0): Linear(in_features=64, out_features=1, bias=True)
    )
  )
)

In [27]:
custom_lstm = CustomLSTM(cfg=cfg)  # create a new CustomLSTM (with random weights)
custom_lstm.copy_weights(cuda_lstm)  # copy the CudaLSTM weights into the CustomLSTM

print('no. parameters:\t{0}'.format(sum(p.numel() for p in custom_lstm.parameters())))
print('device:\t\t{0}'.format(next(custom_lstm.parameters()).device))

custom_lstm

no. parameters:	22593
device:		cuda:0


CustomLSTM(
  (embedding_net): InputLayer(
    (statics_embedding): Identity()
    (dynamics_embedding): Identity()
  )
  (cell): _LSTMCell()
  (dropout): Dropout(p=0.4, inplace=False)
  (head): Regression(
    (net): Sequential(
      (0): Linear(in_features=64, out_features=1, bias=True)
    )
  )
)

In [35]:
# make sure we're in eval mode where dropout is deactivated
custom_lstm.eval()
cuda_lstm.eval()

# load the dataset
scaler = load_scaler(run_dir)
dataset = get_dataset(cfg, is_train=False, period='test', scaler=scaler)
dataloader = DataLoader(dataset, batch_size=1000, shuffle=False, collate_fn=dataset.collate_fn)

cudalstm_output = []
customlstm_output = []
# no need to calculate any gradients since we're just running some evaluations
with torch.inference_mode():#torch.no_grad():
    for sample in tqdm(dataloader):
        sample = {k: v.to(device) if isinstance(v, torch.Tensor) else v for k, v in sample.items()}
        customlstm_output.append(custom_lstm(sample).cpu())
        cudalstm_output.append(cuda_lstm(sample).cpu())

print('CudaLSTM output:  ', list(cudalstm_output[0].keys()))
print('CustomLSTM output:', list(customlstm_output[0].keys()))

# check if predictions of CustomLSTM and CudaLSTM are identical
print('Identical predictions:', torch.allclose(customlstm_output[0]['y_hat'], cudalstm_output[0]['y_hat'], atol=1e-5))

  0%|          | 0/2299 [00:00<?, ?it/s]

RuntimeError: Expected all tensors to be on the same device, but found at least two devices, cpu and cuda:0! (when checking argument for argument mat1 in method wrapper_CUDA_addmm)

In [37]:
# Make sure we're in eval mode where dropout is deactivated
custom_lstm.eval()
cuda_lstm.eval()

# Load the dataset
scaler = load_scaler(run_dir)
dataset = get_dataset(cfg, is_train=False, period='test', scaler=scaler)
dataloader = DataLoader(dataset, batch_size=1000, shuffle=False, collate_fn=dataset.collate_fn)

# Create lists to store the outputs
cudalstm_output = []
customlstm_output = []

# No need to calculate any gradients since we're just running some evaluations
with torch.inference_mode():  # equivalent to torch.no_grad()
    for sample in tqdm(dataloader):
        # Move the sample to the same device as the models
        sample = {k: v.to(device) if isinstance(v, torch.Tensor) else v for k, v in sample.items()}
        
        # Forward pass through the models and collect the outputs
        customlstm_output.append(custom_lstm(sample).cpu())  # Move output to CPU if needed
        cudalstm_output.append(cuda_lstm(sample).cpu())      # Move output to CPU if needed

  0% 0/2299 [00:00<?, ?it/s]


RuntimeError: Expected all tensors to be on the same device, but found at least two devices, cpu and cuda:0! (when checking argument for argument mat1 in method wrapper_CUDA_addmm)

In [None]:
print('CudaLSTM shape:  ', cudalstm_output[0]['c_n'].shape)  # [batch size, 1, hidden size]
print('CustomLSTM shape:', customlstm_output[0]['c_n'].shape)  # [batch size, sequence length, hidden size]