ConvLSTM trained on gridded forcings for all stations

In [1]:
%load_ext autoreload
%autoreload 2
import sys
sys.path.append('..')
import os
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt 
from datetime import datetime, timedelta
from sklearn import preprocessing
import netCDF4 as nc
import torch
from torch import nn, utils
from torch.utils.tensorboard import SummaryWriter
from src import load_data, evaluate, conv_lstm, datasets
import torch.autograd as autograd
import pickle

time_stamp = datetime.now().strftime('%Y%m%d-%H%M%S')
time_stamp

'20190723-134412'

In [2]:
import logging
logger = logging.getLogger()
fhandler = logging.FileHandler(filename='../log.out', mode='a')
chandler = logging.StreamHandler(sys.stdout)
formatter = logging.Formatter('%(asctime)s - {} - %(message)s'.format(time_stamp))
fhandler.setFormatter(formatter)
chandler.setFormatter(formatter)
logger.addHandler(fhandler)
logger.addHandler(chandler)
logger.setLevel(logging.INFO)

In [3]:
USE_CUDA = False
if torch.cuda.is_available():
    print('CUDA Available')
    USE_CUDA = True
device = torch.device('cuda:0' if USE_CUDA else 'cpu')
num_devices = torch.cuda.device_count() if USE_CUDA else 0
logger.warning('cuda devices: {}'.format(list(torch.cuda.get_device_name(i) for i in range(num_devices))))
torch.manual_seed(0)
np.random.seed(0)

CUDA Available
2019-07-23 13:44:13,047 - 20190723-134412 - cuda devices: ['Tesla V100-SXM2-16GB', 'Tesla V100-SXM2-16GB']


In [4]:
seq_len = 50
seq_steps = 1

train_start = datetime.strptime('2010-01-01', '%Y-%m-%d') + timedelta(hours=seq_len * seq_steps)  # first day for which to make a prediction in train set
train_end = '2012-09-30'
val_start = '2012-10-01'
val_end = '2012-12-31'
test_start = '2013-01-01'
test_end = '2014-12-31'

In [5]:
rdrs_vars = [4,5]
train_dataset = datasets.RdrsGridDataset(rdrs_vars, seq_len, seq_steps, train_start, train_end)
val_dataset = datasets.RdrsGridDataset(rdrs_vars, seq_len, seq_steps, val_start, val_end, conv_scalers=train_dataset.conv_scalers)
test_dataset = datasets.RdrsGridDataset(rdrs_vars, seq_len, seq_steps, test_start, test_end, conv_scalers=train_dataset.conv_scalers)

  rdrs_data[:,i,:,:] = rdrs_nc[forcing_variables[i]][:]
  rdrs_data[:,i,:,:] = rdrs_nc[forcing_variables[i]][:]
  rdrs_data[:,i,:,:] = rdrs_nc[forcing_variables[i]][:]


In [6]:
class ConvLSTMGrid(nn.Module):
    def __init__(self, input_size, input_dim, hidden_dim, kernel_size, num_layers, dropout=0.0):
        super(ConvLSTMGrid, self).__init__()
        self.conv = conv_lstm.ConvLSTM((input_size[0], input_size[1]), input_dim, hidden_dim, kernel_size, num_layers, batch_first=True)
        self.dropout = nn.Dropout2d(p=dropout)
        self.linear = nn.Linear(input_size[0] * input_size[1], input_size[0] * input_size[1])
        
    def forward(self, input_tensor, hidden_state=None):
        conv_out, hidden = self.conv(input_tensor, hidden_state=hidden_state)
        conv_out = self.dropout(conv_out[-1][:,-1,0,:,:])  # last output of last layer (which has only 1 dimension anyways)
        return self.linear(conv_out.reshape(input_tensor.shape[0], -1)).reshape((input_tensor.shape[0], conv_out.shape[1], conv_out.shape[2])), hidden

In [7]:
# Train model
num_epochs = 100
learning_rate = 2e-3
patience = 50
min_improvement = 0.05
best_loss_model = (-1, np.inf, None)

# Prepare model
stateful_lstm = True
num_conv_layers = 3
conv_hidden_dims = [8] * (num_conv_layers - 1) + [1]
batch_size = 16
kernel_size = [(3,3), (3,3), (1,1)]
dropout = 0.2

model = ConvLSTMGrid((train_dataset.conv_height, train_dataset.conv_width), train_dataset.n_conv_vars, conv_hidden_dims, kernel_size, num_conv_layers, dropout=dropout).to(device)
model = torch.nn.DataParallel(model, device_ids=list(range(num_devices)))
loss_fn = torch.nn.MSELoss(reduction='mean')
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

writer = SummaryWriter()
param_description = {'time_stamp': time_stamp, 'H_conv': conv_hidden_dims, 'batch_size': batch_size, 'num_conv_layers': num_conv_layers, 'kernel_size': kernel_size, 'loss': loss_fn, 
                     'optimizer': optimizer, 'lr': learning_rate, 'patience': patience, 'min_improvement': min_improvement, 'stateful_lstm': stateful_lstm, 'dropout': dropout,
                     'num_epochs': num_epochs, 'seq_len': seq_len, 'seq_steps': seq_steps, 'train_start': train_start, 'train_end': train_end, 'val_start': val_start, 
                     'val_end': val_end, 'test_start': test_start, 'test_end': test_end, 'n_conv_vars': train_dataset.n_conv_vars, 'model': str(model).replace('\n','').replace(' ', ''),
                     'train len':len(train_dataset), 'val len': len(val_dataset), 'conv_height': train_dataset.conv_height, 'conv_width': train_dataset.conv_width, 'test len': len(test_dataset)}
writer.add_text('Parameter Description', str(param_description))

In [8]:
if stateful_lstm:
    train_sampler = datasets.StatefulBatchSampler(train_dataset, batch_size)
    val_sampler = datasets.StatefulBatchSampler(val_dataset, batch_size)
    test_sampler = datasets.StatefulBatchSampler(test_dataset, batch_size)
    train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_sampler=train_sampler, pin_memory=True)
    val_dataloader = torch.utils.data.DataLoader(val_dataset, batch_sampler=val_sampler, pin_memory=True)
    test_dataloader = torch.utils.data.DataLoader(test_dataset, batch_sampler=test_sampler, pin_memory=True)
else:
    train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size, shuffle=True, pin_memory=True, drop_last=True)
    val_dataloader = torch.utils.data.DataLoader(val_dataset, batch_size, shuffle=False, pin_memory=True, drop_last=True)
    test_dataloader = torch.utils.data.DataLoader(test_dataset, batch_size, shuffle=False, pin_memory=True, drop_last=True)



In [9]:
for epoch in range(num_epochs):
    model.train()

    epoch_losses = torch.tensor(0.0)
    conv_hidden_states = None
    for i, train_batch in enumerate(train_dataloader):
        y_train = train_batch['y'].reshape(-1).to(device, non_blocking=True)
        mask = train_batch['mask'].reshape(-1).to(device, non_blocking=True)
        if mask.sum() == 0:
            print('Batch {} has no target values. skipping.'.format(i))
            continue
        if not stateful_lstm:
            conv_hidden_states = None
        
        y_pred, conv_hidden_states = model(train_batch['x_conv'].to(device), hidden_state=conv_hidden_states)
        y_pred = y_pred.reshape(-1) * mask  # ignore grid cells that have no target value
        loss = loss_fn(y_pred, y_train)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        epoch_losses += (loss * y_train.shape[0] / mask.sum()).detach()  # only average over losses where we had a target
        
    epoch_loss = (epoch_losses / len(train_dataloader)).item()
    print('Epoch', epoch, 'mean train loss:\t{}'.format(epoch_loss))
    writer.add_scalar('loss', epoch_loss, epoch)
    
    # eval on validation split
    model.eval()
    val_losses = torch.tensor(0.0)
    for i, val_batch in enumerate(val_dataloader):
        y_val = val_batch['y'].reshape(-1).to(device, non_blocking=True)
        mask = val_batch['mask'].reshape(-1).to(device, non_blocking=True)
        if not stateful_lstm:
            conv_hidden_states = None
        
        batch_pred, conv_hidden_states = model(val_batch['x_conv'], hidden_state=conv_hidden_states)
        batch_pred = batch_pred.detach().reshape(-1)
        val_losses += (loss_fn(batch_pred * mask, y_val).detach() * y_val.shape[0] / mask.sum())
        
    val_mse = (val_losses / len(val_dataloader)).item()
    print('Epoch {} mean val mse:    \t{}'.format(epoch, val_mse))
    writer.add_scalar('loss_eval', val_mse, epoch)

    if val_mse < best_loss_model[1] - min_improvement:
        best_loss_model = (epoch, val_mse, model.state_dict())  # new best model
        load_data.pickle_model('ConvLSTM_VIC', model, 'allStations', time_stamp)
    elif epoch > best_loss_model[0] + patience:
        print('Patience exhausted in epoch {}. Best val-loss was {}'.format(epoch, best_loss_model[1]))
        break

print('Using best model from epoch', str(best_loss_model[0]), 'which had loss', str(best_loss_model[1]))
model.load_state_dict(best_loss_model[2])
load_data.pickle_model('ConvLSTM_VIC', model, 'allStations', time_stamp)

Epoch 0 mean train loss:	3542.933349609375
Epoch 0 mean val mse:    	739.79638671875


  "type " + obj.__name__ + ". It won't be checked "


Saved model as ../pickle/models/ConvLSTM_VIC_allStations_20190723-134412.pkl
Epoch 1 mean train loss:	2932.011962890625
Epoch 1 mean val mse:    	1029.6136474609375
Epoch 2 mean train loss:	2896.7138671875
Epoch 2 mean val mse:    	1028.246337890625
Epoch 3 mean train loss:	2891.169921875
Epoch 3 mean val mse:    	1034.139404296875
Epoch 4 mean train loss:	2885.984130859375
Epoch 4 mean val mse:    	1037.7230224609375
Epoch 5 mean train loss:	2898.70947265625
Epoch 5 mean val mse:    	1017.8292846679688
Epoch 6 mean train loss:	2901.99951171875
Epoch 6 mean val mse:    	1017.2156982421875
Epoch 7 mean train loss:	2901.133544921875
Epoch 7 mean val mse:    	1019.3958129882812
Epoch 8 mean train loss:	2894.230712890625
Epoch 8 mean val mse:    	1004.3074340820312
Epoch 9 mean train loss:	2805.563232421875
Epoch 9 mean val mse:    	1186.6883544921875
Epoch 10 mean train loss:	2738.604248046875
Epoch 10 mean val mse:    	1287.025634765625
Epoch 11 mean train loss:	2601.6396484375
Epoch 11 

In [10]:
logger.warning('predicting')
model.eval()

predictions = []
for i, test_batch in enumerate(test_dataloader):
    if not stateful_lstm:
        conv_hidden_states = None
        
    pred, conv_hidden_states = model(test_batch['x_conv'], hidden_state=conv_hidden_states)
    predictions.append(pred.detach())
    
predictions = torch.cat(predictions).cpu()

if stateful_lstm:
    # reorder time series
    pred_indices = np.array(list(test_sampler.__iter__())).reshape(-1)
    predictions = predictions[pred_indices.argsort()]

2019-07-23 13:58:25,767 - 20190723-134412 - predicting


In [11]:
actuals = test_dataset.data_runoff.copy()
if len(actuals['date'].unique()) != len(predictions):
    print('Warning: length of prediction {} and actuals {} does not match.'.format(len(predictions), len(actuals['date'].unique())))

nse_list = []
mse_list = []
predictions_df = pd.DataFrame(columns=actuals.columns)
for station in actuals['station'].unique():
    row, col = test_dataset.station_to_row_col[station]
    
    act = actuals[actuals['station'] == station].set_index('date')['runoff']
    if predictions.shape[0] != act.shape[0]:
        print('Warning: length of prediction {} and actuals {} does not match for station {}. Ignoring excess actuals.'.format(len(predictions), len(act), station))
        act = act.iloc[:predictions.shape[0]]
    pred = pd.DataFrame({'runoff': predictions[:,row,col]}, index=act.index)
    pred['station'] = station
    predictions_df = predictions_df.append(pred.reset_index(), sort=True)
    
    nse, mse = evaluate.evaluate_daily(station, pred['runoff'], act, writer=writer)
    nse_list.append(nse)
    mse_list.append(mse)
    
    print(station, '\tNSE:', nse, '\tMSE:', mse, '(clipped to 0)')

print('Median NSE (clipped to 0)', np.median(nse_list), '/ Min', np.min(nse_list), '/ Max', np.max(nse_list))
print('Median MSE (clipped to 0)', np.median(mse_list), '/ Min', np.min(mse_list), '/ Max', np.max(mse_list))
writer.add_scalar('nse_median', np.median(nse_list))




To register the converters:
	>>> from pandas.plotting import register_matplotlib_converters
	>>> register_matplotlib_converters()


02GA047 	NSE: 0.010180937309954174 	MSE: 78.59408397863525 (clipped to 0)
04213000 	NSE: -0.07811303901701216 	MSE: 195.9159727920268 (clipped to 0)
04176500 	NSE: 0.022720099883178135 	MSE: 675.8462510013679 (clipped to 0)
02GG003 	NSE: 0.11120106318494416 	MSE: 429.18663781885175 (clipped to 0)
04214500 	NSE: 0.08382365242162226 	MSE: 111.8480165826856 (clipped to 0)
02GC026 	NSE: 0.09124759651618286 	MSE: 148.05697319308405 (clipped to 0)
04174500 	NSE: 0.006706003531232407 	MSE: 95.1804080729224 (clipped to 0)
02GG013 	NSE: 0.010034235869531227 	MSE: 38.082392090784154 (clipped to 0)
04161820 	NSE: -0.9838320538191772 	MSE: 63.27894843483675 (clipped to 0)
04159492 	NSE: 0.049671628779830224 	MSE: 441.5063902642216 (clipped to 0)
04200500 	NSE: 0.00574537585500956 	MSE: 1007.8564039203993 (clipped to 0)
02GB001 	NSE: -0.14444229300420042 	MSE: 8593.774121396165 (clipped to 0)
04208504 	NSE: -0.07041752689633052 	MSE: 1057.1865538533905 (clipped to 0)
04199000 	NSE: -0.0156618874304

In [12]:
writer.close()

In [13]:
save_df = pd.merge(predictions_df.rename({'runoff': 'prediction'}, axis=1), actuals.rename({'runoff': 'actual'}, axis=1), 
                   on=['date', 'station'])[['date', 'station', 'prediction', 'actual']]
load_data.pickle_results('ConvLSTM_VIC', save_df, time_stamp)

'ConvLSTM_VIC_20190723-134412.pkl'

In [14]:
datetime.now().strftime('%Y%m%d-%H%M%S')

'20190723-135839'