ConvLSTM trained on gridded forcings for all stations

In [None]:
%load_ext autoreload
%autoreload 2
import sys
sys.path.append('..')
import os
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt 
from datetime import datetime, timedelta
from sklearn import preprocessing
import netCDF4 as nc
import torch
from torch import nn, utils
from torch.utils.tensorboard import SummaryWriter
from src import load_data, evaluate, conv_lstm, datasets
import torch.autograd as autograd
import pickle

time_stamp = datetime.now().strftime('%Y%m%d-%H%M%S')
time_stamp

In [None]:
import logging
logger = logging.getLogger()
fhandler = logging.FileHandler(filename='../log.out', mode='a')
formatter = logging.Formatter('%(asctime)s - {} - %(message)s'.format(time_stamp))
fhandler.setFormatter(formatter)
logger.addHandler(fhandler)
logger.setLevel(logging.DEBUG)

In [None]:
USE_CUDA = False
if torch.cuda.is_available():
    print('CUDA Available')
    USE_CUDA = True
device = torch.device('cuda' if USE_CUDA else 'cpu')
torch.manual_seed(0)
np.random.seed(0)

In [None]:
seq_len = 50
seq_steps = 4
validation_fraction = 0.2

train_start = datetime.strptime('2010-01-01', '%Y-%m-%d') + timedelta(hours=seq_len * seq_steps)  # first day for which to make a prediction in train set
train_end = '2010-12-31'
test_start = '2013-01-01'
test_end = '2014-12-31'

In [None]:
rdrs_vars = [4,5]
train_dataset = datasets.RdrsDataset(rdrs_vars, seq_len, seq_steps, train_start, train_end)
test_dataset = datasets.RdrsDataset(rdrs_vars, seq_len, seq_steps, test_start, test_end, 
                                    conv_scalers=train_dataset.conv_scalers, fc_scalers=train_dataset.fc_scalers)

In [None]:
# Train model
num_epochs = 150
learning_rate = 2e-3
patience = 100
min_improvement = 0.05
best_loss_model = (-1, np.inf, None)

# Prepare model
lstm_layers = 4
conv_hidden_dims = [16] * (lstm_layers - 1) + [1]
H_fc = 8
batch_size = 16
fc_layers = 1
kernel_size = (3,3)
dropout = 0.0
pooling = [True] * lstm_layers
model = conv_lstm.ConvLSTMRegression((train_dataset.conv_height, train_dataset.conv_width), train_dataset.n_fc_vars, train_dataset.n_conv_vars, conv_hidden_dims, 
                                     kernel_size, lstm_layers, dropout, fc_layers, H_fc, pooling, batch_first=True).to(device)
loss_fn = torch.nn.MSELoss(reduction='mean')
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

writer = SummaryWriter()
param_description = {'H_conv': conv_hidden_dims, 'H_fc': H_fc, 'batch_size': batch_size, 'lstm_layers': lstm_layers, 'fc_layers': fc_layers, 'kernel_size': kernel_size, 'loss': loss_fn, 
                     'optimizer': optimizer, 'lr': learning_rate, 'patience': patience, 'min_improvement': min_improvement, 'pooling': pooling,
                     'num_epochs': num_epochs, 'seq_len': seq_len, 'seq_steps': seq_steps, 'validation_fraction': validation_fraction, 'dropout': dropout,
                     'train_start': train_start, 'train_end': train_end, 'test_start': test_start, 'test_end': test_end, 'x_conv_train': x_conv_train.shape,
                     'x_conv_val': x_conv_val.shape, 'x_fc_train': x_fc_train.shape, 'x_fc_val': x_fc_val.shape, 'y_train': y_train.shape, 'y_val': y_val.shape}
writer.add_text('Parameter Description', str(param_description))

In [None]:
indices = list(range(len(train_dataset)))
n_val_samples = int(validation_fraction * len(train_dataset))
validation_indices = np.random.choice(indices, size=n_val_samples, replace=False)
train_indices = list(set(indices) - set(validation_indices))

train_sampler = utils.data.SubsetRandomSampler(train_indices)
validation_sampler = utils.data.SubsetRandomSampler(validation_indices)

train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size, sampler=train_sampler)
validation_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size, sampler=validation_sampler)
test_dataloader = torch.utils.data.DataLoader(test_dataset, batch_size, shuffle=False)

In [None]:
for epoch in range(num_epochs):
    model.train()

    epoch_losses = []
    for i, train_batch in enumerate(train_dataloader):
        y_pred = model(train_batch['x_conv'].to(device), train_batch['x_fc'].to(device))
        loss = loss_fn(y_pred, train_batch['y'].reshape((batch_size, 1)).to(device))
        epoch_losses.append(loss.item())

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        if USE_CUDA & (i % 100 == 0):
            logger.warning('epoch {} i {} cuda memory: {}'.format(epoch, i, torch.cuda.max_memory_allocated(device=device)))
            torch.cuda.reset_max_memory_allocated(device=device)
    
    epoch_loss = np.array(epoch_losses).mean()
    print('Epoch', epoch, 'mean train loss:\t{}'.format(epoch_loss))
    writer.add_scalar('loss', epoch_loss, epoch)
    
    # eval on validation split
    model.eval()
    val_pred = np.array([])
    val_y = np.array([])
    for i, val_batch in enumerate(validation_dataloader):
        batch_pred = model(val_batch['x_conv'].to(device), val_batch['x_fc'].to(device)).detach().cpu().numpy().reshape(batch_size)
        val_pred = np.concatenate([val_pred, batch_pred])
        val_y = np.concatenate([val_y, val_batch['y'].numpy()])
    
    val_nse, val_mse = evaluate.evaluate_daily('All Stations', pd.Series(val_pred), pd.Series(val_y))
    print('Epoch {} mean val mse:    \t{},\tnse: {}'.format(epoch, val_mse, val_nse))
    writer.add_scalar('loss_eval', val_mse, epoch)

    if val_mse < best_loss_model[1] - min_improvement:
        best_loss_model = (epoch, val_mse, model.state_dict())  # new best model
    elif epoch > best_loss_model[0] + patience:
        print('Patience exhausted in epoch {}. Best val-loss was {}'.format(epoch, best_loss_model[1]))
        break

print('Using best model from epoch', str(best_loss_model[0]), 'which had loss', str(best_loss_model[1]))
model.load_state_dict(best_loss_model[2])
load_data.pickle_model('ConvLSTM+LinearLayer_VIC', model, 'allStations', time_stamp)

In [None]:
model.eval()

predict = test_dataset.data_runoff.copy()
predict['runoff'] = np.nan
pred_array = np.array([])
for i, test_batch in enumerate(test_dataloader):
    pred_array = np.concatenate([pred_array, model(test_batch['x_conv'].to(device), test_batch['x_fc'].to(device)).detach().cpu().numpy().reshape(batch_size)])

predict['runoff'] = pred_array

In [None]:
actuals = test_dataset.data_runoff.copy()

nse_list = []
mse_list = []
for station in predict['station'].unique():
    nse, mse = evaluate.evaluate_daily(station, predict[predict['station'] == station]['runoff'], actuals[actuals['station'] == station]['runoff'], writer=writer)
    nse_list.append(nse)
    mse_list.append(mse)
    
    print(station, '\tNSE:', nse, '\tMSE:', mse, '(clipped to 0)')

print('Median NSE (clipped to 0)', np.median(nse_list), '/ Min', np.min(nse_list), '/ Max', np.max(nse_list))
print('Median MSE (clipped to 0)', np.median(mse_list), '/ Min', np.min(mse_list), '/ Max', np.max(mse_list))

In [None]:
writer.close()

In [None]:
save_df = pd.merge(predict.rename({'runoff': 'prediction'}, axis=1), actuals.rename({'runoff': 'actual'}, axis=1), 
                   on=['date', 'station'])[['date', 'station', 'prediction', 'actual']]
load_data.pickle_results('ConvLSTM+LinearLayer_VIC', save_df, time_stamp)

In [None]:
datetime.now().strftime('%Y%m%d-%H%M%S')