ConvLSTM trained on gridded forcings for all stations

In [1]:
%load_ext autoreload
%autoreload 2
import sys
sys.path.append('..')
import os
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt 
from datetime import datetime, timedelta
from sklearn import preprocessing
import netCDF4 as nc
import torch
from torch import nn, utils
from torch.utils.tensorboard import SummaryWriter
from src import load_data, evaluate, conv_lstm, datasets
import torch.autograd as autograd
import pickle

time_stamp = datetime.now().strftime('%Y%m%d-%H%M%S')
time_stamp

'20190724-162551'

In [2]:
import logging
logger = logging.getLogger()
fhandler = logging.FileHandler(filename='../log.out', mode='a')
chandler = logging.StreamHandler(sys.stdout)
formatter = logging.Formatter('%(asctime)s - {} - %(message)s'.format(time_stamp))
fhandler.setFormatter(formatter)
chandler.setFormatter(formatter)
logger.addHandler(fhandler)
logger.addHandler(chandler)
logger.setLevel(logging.INFO)

In [3]:
USE_CUDA = False
if torch.cuda.is_available():
    print('CUDA Available')
    USE_CUDA = True
device = torch.device('cuda:0' if USE_CUDA else 'cpu')
num_devices = torch.cuda.device_count() if USE_CUDA else 0
logger.warning('cuda devices: {}'.format(list(torch.cuda.get_device_name(i) for i in range(num_devices))))
torch.manual_seed(0)
np.random.seed(0)

CUDA Available
2019-07-24 16:25:52,241 - 20190724-162551 - cuda devices: ['Tesla V100-SXM2-16GB']


In [4]:
seq_len = 5*24
seq_steps = 2
stateful_lstm = False
validation_fraction, val_start, val_end = None, None, None

if stateful_lstm:
    train_start = datetime.strptime('2010-01-01', '%Y-%m-%d') + timedelta(hours=seq_len * seq_steps)  # first day for which to make a prediction in train set
    train_end = '2012-09-30'
    val_start = '2012-10-01'
    val_end = '2012-12-31'
else:
    validation_fraction = 0.1
    train_start = datetime.strptime('2010-01-01', '%Y-%m-%d') + timedelta(hours=seq_len * seq_steps)  # first day for which to make a prediction in train set
    train_end = '2012-12-31'
test_start = '2013-01-01'
test_end = '2014-12-31'

In [5]:
rdrs_vars = list(range(8))
train_dataset = datasets.RdrsGridDataset(rdrs_vars, seq_len, seq_steps, train_start, train_end)
if stateful_lstm:
    val_dataset = datasets.RdrsGridDataset(rdrs_vars, seq_len, seq_steps, val_start, val_end, conv_scalers=train_dataset.conv_scalers)
test_dataset = datasets.RdrsGridDataset(rdrs_vars, seq_len, seq_steps, test_start, test_end, conv_scalers=train_dataset.conv_scalers)

  rdrs_data[:,i,:,:] = rdrs_nc[forcing_variables[i]][:]
  rdrs_data[:,i,:,:] = rdrs_nc[forcing_variables[i]][:]


In [6]:
class ConvLSTMGrid(nn.Module):
    def __init__(self, input_size, input_dim, hidden_dim, kernel_size, num_layers, dropout=0.0):
        super(ConvLSTMGrid, self).__init__()
        self.conv_lstm = conv_lstm.ConvLSTM((input_size[0], input_size[1]), input_dim, hidden_dim, kernel_size, num_layers, batch_first=True)
        self.dropout = nn.Dropout2d(p=dropout)
        self.conv_out = nn.Conv2d(hidden_dim[-1], 1, (1,1))
        
    def forward(self, input_tensor, hidden_state=None):
        convlstm_out, hidden = self.conv_lstm(input_tensor, hidden_state=hidden_state)
        convlstm_out = self.dropout(convlstm_out[-1][:,-1,:,:,:])  # last output of last layer
        return self.conv_out(convlstm_out)[:,0,:,:], hidden

In [7]:
# Train model
num_epochs = 800
learning_rate = 2e-3
patience = 250
min_improvement = 0.01
best_loss_model = (-1, np.inf, None)

# Prepare model
num_conv_layers = 2
conv_hidden_dims = [16] * num_conv_layers
batch_size = 16
kernel_size = [(5,5)] * num_conv_layers
dropout = 0.3
weight_decay = 1e-5

model = ConvLSTMGrid((train_dataset.conv_height, train_dataset.conv_width), train_dataset.n_conv_vars, conv_hidden_dims, kernel_size, num_conv_layers, dropout=dropout).to(device)
model = torch.nn.DataParallel(model, device_ids=list(range(num_devices)))
loss_fn = evaluate.NSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate, weight_decay=weight_decay)

writer = SummaryWriter()
param_description = {'time_stamp': time_stamp, 'H_conv': conv_hidden_dims, 'batch_size': batch_size, 'num_conv_layers': num_conv_layers, 'kernel_size': kernel_size, 'loss': loss_fn, 
                     'optimizer': optimizer, 'lr': learning_rate, 'patience': patience, 'min_improvement': min_improvement, 'stateful_lstm': stateful_lstm, 'dropout': dropout,
                     'num_epochs': num_epochs, 'seq_len': seq_len, 'seq_steps': seq_steps, 'train_start': train_start, 'train_end': train_end, 'weight_decay': weight_decay, 'validation_fraction': validation_fraction,
                     'test_start': test_start, 'test_end': test_end, 'n_conv_vars': train_dataset.n_conv_vars, 'model': str(model).replace('\n','').replace(' ', ''), 'val_start': val_start, 'val_end': val_end,
                     'train len':len(train_dataset), 'conv_height': train_dataset.conv_height, 'conv_width': train_dataset.conv_width, 'test len': len(test_dataset)}
writer.add_text('Parameter Description', str(param_description))
str(param_description)

"{'time_stamp': '20190724-162551', 'H_conv': [16, 16], 'batch_size': 16, 'num_conv_layers': 2, 'kernel_size': [(5, 5), (5, 5)], 'loss': NSELoss(), 'optimizer': Adam (\nParameter Group 0\n    amsgrad: False\n    betas: (0.9, 0.999)\n    eps: 1e-08\n    lr: 0.002\n    weight_decay: 1e-05\n), 'lr': 0.002, 'patience': 250, 'min_improvement': 0.01, 'stateful_lstm': False, 'dropout': 0.3, 'num_epochs': 800, 'seq_len': 120, 'seq_steps': 2, 'train_start': datetime.datetime(2010, 1, 11, 0, 0), 'train_end': '2012-12-31', 'weight_decay': 1e-05, 'validation_fraction': 0.1, 'test_start': '2013-01-01', 'test_end': '2014-12-31', 'n_conv_vars': 8, 'model': 'DataParallel((module):ConvLSTMGrid((conv_lstm):ConvLSTM((cell_list):ModuleList((0):ConvLSTMCell((conv):Conv2d(24,64,kernel_size=(5,5),stride=(1,1),padding=(2,2)))(1):Identity()(2):ConvLSTMCell((conv):Conv2d(32,64,kernel_size=(5,5),stride=(1,1),padding=(2,2)))(3):Identity()))(dropout):Dropout2d(p=0.3)(conv_out):Conv2d(16,1,kernel_size=(1,1),stride=(

In [8]:
if stateful_lstm:
    train_sampler = datasets.StatefulBatchSampler(train_dataset, batch_size)
    val_sampler = datasets.StatefulBatchSampler(val_dataset, batch_size)
    test_sampler = datasets.StatefulBatchSampler(test_dataset, batch_size)
    train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_sampler=train_sampler, pin_memory=True)
    val_dataloader = torch.utils.data.DataLoader(val_dataset, batch_sampler=val_sampler, pin_memory=True)
    test_dataloader = torch.utils.data.DataLoader(test_dataset, batch_sampler=test_sampler, pin_memory=True)
else:
    val_indices = np.random.choice(len(train_dataset), size=int(validation_fraction * len(train_dataset)), replace=False)
    train_indices = list(i for i in range(len(train_dataset)) if i not in val_indices)
    train_sampler = torch.utils.data.SubsetRandomSampler(train_indices)
    val_sampler = torch.utils.data.SubsetRandomSampler(val_indices)
    train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size, sampler=train_sampler, pin_memory=True, drop_last=True)
    val_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size, sampler=val_sampler, pin_memory=True, drop_last=True)
    test_dataloader = torch.utils.data.DataLoader(test_dataset, batch_size, shuffle=False, pin_memory=True, drop_last=True)

In [9]:
for epoch in range(num_epochs):
    model.train()

    epoch_losses = torch.tensor(0.0)
    conv_hidden_states = None
    for i, train_batch in enumerate(train_dataloader):
        y_train = train_batch['y'].reshape((train_batch['y'].shape[0],-1)).to(device, non_blocking=True)
        mask = train_batch['mask'].sum(dim=0, dtype=torch.int8).reshape(-1).to(device, non_blocking=True) != 0
        if mask.sum() == 0:
            print('Batch {} has no target values. skipping.'.format(i))
            continue
        if not stateful_lstm:
            conv_hidden_states = None
        
        y_pred, conv_hidden_states = model(train_batch['x_conv'].to(device), hidden_state=conv_hidden_states)
        y_pred = y_pred.reshape((train_batch['y'].shape[0], -1))
        loss = loss_fn(y_pred[:,mask], y_train[:,mask])

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        epoch_losses += loss.detach()
        
    epoch_loss = (epoch_losses / len(train_dataloader)).item()
    print('Epoch', epoch, 'mean train loss:\t{}'.format(epoch_loss))
    writer.add_scalar('loss_nse', epoch_loss, epoch)
    
    # eval on validation split
    model.eval()
    val_losses = torch.tensor(0.0)
    for i, val_batch in enumerate(val_dataloader):
        y_val = val_batch['y'].reshape((val_batch['y'].shape[0],-1)).to(device, non_blocking=True)
        mask = val_batch['mask'].sum(dim=0, dtype=torch.int8).reshape(-1).to(device, non_blocking=True) != 0
        if not stateful_lstm:
            conv_hidden_states = None
        
        batch_pred, conv_hidden_states = model(val_batch['x_conv'], hidden_state=conv_hidden_states)
        batch_pred = batch_pred.detach().reshape((val_batch['y'].shape[0], -1))
        val_losses += loss_fn(batch_pred[:,mask], y_val[:,mask]).detach()
        
    val_nse = (val_losses / len(val_dataloader)).item()
    print('Epoch {} mean val loss:  \t{}'.format(epoch, val_nse))
    writer.add_scalar('loss_nse_val', val_nse, epoch)
    
    if val_nse < best_loss_model[1] - min_improvement:
        best_loss_model = (epoch, val_nse, model.state_dict())  # new best model
        load_data.pickle_model('ConvLSTM_VIC', model, 'allStations', time_stamp)
    elif epoch > best_loss_model[0] + patience:
        print('Patience exhausted in epoch {}. Best val-loss was {}'.format(epoch, best_loss_model[1]))
        break

print('Using best model from epoch', str(best_loss_model[0]), 'which had loss', str(best_loss_model[1]))
model.load_state_dict(best_loss_model[2])
load_data.pickle_model('ConvLSTM_VIC', model, 'allStations', time_stamp)

Epoch 0 mean train loss:	1.523316502571106
Epoch 0 mean val loss:  	1.45879328250885


  "type " + obj.__name__ + ". It won't be checked "


Saved model as ../pickle/models/ConvLSTM_VIC_allStations_20190724-162551.pkl
Epoch 1 mean train loss:	1.514652132987976
Epoch 1 mean val loss:  	1.4046987295150757
Saved model as ../pickle/models/ConvLSTM_VIC_allStations_20190724-162551.pkl
Epoch 2 mean train loss:	1.490653395652771
Epoch 2 mean val loss:  	1.4373416900634766
Epoch 3 mean train loss:	1.4812010526657104
Epoch 3 mean val loss:  	1.4948927164077759
Epoch 4 mean train loss:	1.4959542751312256
Epoch 4 mean val loss:  	1.4700312614440918
Epoch 5 mean train loss:	1.4414606094360352
Epoch 5 mean val loss:  	1.413620948791504
Epoch 6 mean train loss:	1.4369326829910278
Epoch 6 mean val loss:  	1.4388893842697144
Epoch 7 mean train loss:	1.4093172550201416
Epoch 7 mean val loss:  	1.372632384300232
Saved model as ../pickle/models/ConvLSTM_VIC_allStations_20190724-162551.pkl
Epoch 8 mean train loss:	1.3944332599639893
Epoch 8 mean val loss:  	1.3068500757217407
Saved model as ../pickle/models/ConvLSTM_VIC_allStations_20190724-162

In [10]:
logger.warning('predicting')
model.eval()

predictions = []
for i, test_batch in enumerate(test_dataloader):
    if not stateful_lstm:
        conv_hidden_states = None
        
    pred, conv_hidden_states = model(test_batch['x_conv'], hidden_state=conv_hidden_states)
    predictions.append(pred.detach())
    
predictions = torch.cat(predictions).cpu()

if stateful_lstm:
    # reorder time series
    pred_indices = np.array(list(test_sampler.__iter__())).reshape(-1)
    predictions = predictions[pred_indices.argsort()]

2019-07-24 22:44:31,703 - 20190724-162551 - predicting


In [11]:
actuals = test_dataset.data_runoff.copy()
if len(actuals['date'].unique()) != len(predictions):
    print('Warning: length of prediction {} and actuals {} does not match.'.format(len(predictions), len(actuals['date'].unique())))

nse_list = []
mse_list = []
predictions_df = pd.DataFrame(columns=actuals.columns)
for station in actuals['station'].unique():
    row, col = test_dataset.station_to_index[station]
    
    act = actuals[actuals['station'] == station].set_index('date')['runoff']
    if predictions.shape[0] != act.shape[0]:
        print('Warning: length of prediction {} and actuals {} does not match for station {}. Ignoring excess actuals.'.format(len(predictions), len(act), station))
        act = act.iloc[:predictions.shape[0]]
    pred = pd.DataFrame({'runoff': predictions[:,row,col]}, index=act.index)
    pred['station'] = station
    predictions_df = predictions_df.append(pred.reset_index(), sort=True)
    
    nse, mse = evaluate.evaluate_daily(station, pred['runoff'], act, writer=writer)
    nse_list.append(nse)
    mse_list.append(mse)
    
    print(station, '\tNSE:', nse, '\tMSE:', mse, '(clipped to 0)')

print('Median NSE (clipped to 0)', np.median(nse_list), '/ Min', np.min(nse_list), '/ Max', np.max(nse_list))
print('Median MSE (clipped to 0)', np.median(mse_list), '/ Min', np.min(mse_list), '/ Max', np.max(mse_list))
writer.add_scalar('nse_median', np.median(nse_list))




To register the converters:
	>>> from pandas.plotting import register_matplotlib_converters
	>>> register_matplotlib_converters()


02GA047 	NSE: 0.16645254895514683 	MSE: 66.18573114721988 (clipped to 0)
04213000 	NSE: 0.277626723818334 	MSE: 131.27052359104374 (clipped to 0)
04176500 	NSE: -0.036825441640571555 	MSE: 717.0254781581547 (clipped to 0)
02GG003 	NSE: 0.33382735788421003 	MSE: 321.68399919685174 (clipped to 0)
04214500 	NSE: 0.5468631140503432 	MSE: 55.31954854312359 (clipped to 0)
02GC026 	NSE: 0.2528818806979475 	MSE: 121.72297639875367 (clipped to 0)
04174500 	NSE: -0.00076767795375976 	MSE: 95.89655863466695 (clipped to 0)
02GG013 	NSE: 0.28750740755305193 	MSE: 27.408445069993356 (clipped to 0)
04161820 	NSE: -0.24822344632365922 	MSE: 39.81499691115915 (clipped to 0)
04159492 	NSE: 0.22143020328871554 	MSE: 361.71027923053595 (clipped to 0)
04200500 	NSE: 0.24300945965272447 	MSE: 767.3464576061223 (clipped to 0)
02GB001 	NSE: -0.24611673001828138 	MSE: 9357.261412070704 (clipped to 0)
04208504 	NSE: 0.14134303428887518 	MSE: 848.0434743575244 (clipped to 0)
04199000 	NSE: 0.284125395200614 	MSE

In [12]:
writer.close()

In [13]:
save_df = pd.merge(predictions_df.rename({'runoff': 'prediction'}, axis=1), actuals.rename({'runoff': 'actual'}, axis=1), 
                   on=['date', 'station'])[['date', 'station', 'prediction', 'actual']]
load_data.pickle_results('ConvLSTM_VIC', save_df, time_stamp)

'ConvLSTM_VIC_20190724-162551.pkl'

In [14]:
datetime.now().strftime('%Y%m%d-%H%M%S')

'20190724-224450'