ConvLSTM trained on gridded forcings and landcover data for all stations.
Test generalization by training on a subset of stations

In [1]:
%load_ext autoreload
%autoreload 2
import sys
sys.path.append('..')
import os
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt 
from datetime import datetime, timedelta
from sklearn import preprocessing
import netCDF4 as nc
import torch
from torch import nn, utils
from torch.utils.tensorboard import SummaryWriter
from src import load_data, evaluate, conv_lstm, datasets
import torch.autograd as autograd
import pickle

time_stamp = datetime.now().strftime('%Y%m%d-%H%M%S')
time_stamp

'20190731-090931'

In [2]:
import logging
logger = logging.getLogger()
fhandler = logging.FileHandler(filename='../log.out', mode='a')
chandler = logging.StreamHandler(sys.stdout)
formatter = logging.Formatter('%(asctime)s - {} - %(message)s'.format(time_stamp))
fhandler.setFormatter(formatter)
chandler.setFormatter(formatter)
logger.addHandler(fhandler)
logger.addHandler(chandler)
logger.setLevel(logging.INFO)

In [3]:
USE_CUDA = False
if torch.cuda.is_available():
    print('CUDA Available')
    USE_CUDA = True
device = torch.device('cuda:0' if USE_CUDA else 'cpu')
num_devices = torch.cuda.device_count() if USE_CUDA else 0
logger.warning('cuda devices: {}'.format(list(torch.cuda.get_device_name(i) for i in range(num_devices))))
torch.manual_seed(0)
np.random.seed(0)

CUDA Available
2019-07-31 09:09:32,279 - 20190731-090931 - cuda devices: ['Tesla V100-SXM2-16GB']


In [4]:
seq_len = 5*24
seq_steps = 2
stateful_lstm = False
validation_fraction, val_start, val_end = None, None, None

if stateful_lstm:
    val_start = datetime.strptime('2010-01-01', '%Y-%m-%d') + timedelta(hours=seq_len * seq_steps)  # first day for which to make a prediction in train set
    val_end = '2010-09-30'
    train_start = '2010-10-01'
    train_end = '2012-12-31'
else:
    validation_fraction = 0.1
    train_start = datetime.strptime('2010-01-01', '%Y-%m-%d') + timedelta(hours=seq_len * seq_steps)  # first day for which to make a prediction in train set
    train_end = '2012-12-31'
test_start = '2013-01-01'
test_end = '2014-12-31'

In [5]:
exclude_downstream_stations = ['02GB001', '02GB007', '02GC026', '02GG009', '02GG003', '04165500', '04164000', '04166500', '04198000', '04208504']

In [6]:
rdrs_vars = list(range(8))
train_dataset = datasets.RdrsGridDataset(rdrs_vars, seq_len, seq_steps, train_start, train_end, exclude_stations=exclude_downstream_stations)
if stateful_lstm:
    val_dataset = datasets.RdrsGridDataset(rdrs_vars, seq_len, seq_steps, val_start, val_end, conv_scalers=train_dataset.conv_scalers, exclude_stations=exclude_downstream_stations)
test_dataset = datasets.RdrsGridDataset(rdrs_vars, seq_len, seq_steps, test_start, test_end, conv_scalers=train_dataset.conv_scalers)

  rdrs_data[:,i,:,:] = rdrs_nc[forcing_variables[i]][:]
  rdrs_data[:,i,:,:] = rdrs_nc[forcing_variables[i]][:]


In [7]:
landcover_types = None
landcover, landcover_legend = load_data.load_landcover_reduced(values_to_use=landcover_types)
landcover = torch.from_numpy(landcover).float().to(device)

  rdrs_data[:,i,:,:] = rdrs_nc[forcing_variables[i]][:]
cannot be safely cast to variable data type
  landcover_fullres = np.array(landcover_nc['Band1'][:])[::-1,:]


In [8]:
stations = train_dataset.data_runoff['station'].unique()
np.random.seed(2)
test_stations = np.random.choice(stations, size=int(0.2*(len(stations)-len(exclude_downstream_stations))), replace=False)
train_stations = list(s for s in stations if s not in test_stations)

train_station_indices = list(train_dataset.station_to_index[s] for s in train_stations)
test_station_indices = list(test_dataset.station_to_index[s] for s in test_stations)

train_mask = torch.zeros((train_dataset.conv_height, train_dataset.conv_width), dtype=torch.int8)
test_mask = torch.zeros((train_dataset.conv_height, train_dataset.conv_width), dtype=torch.int8)
for row in range(train_dataset.conv_height):
    for col in range(train_dataset.conv_width):
        train_mask[row, col] = 1 if (row, col) in train_station_indices else 0
        test_mask[row, col] = 1 if (row, col) in test_station_indices else 0
train_mask = train_mask.reshape(-1).to(device)
test_mask = test_mask.reshape(-1).to(device)

In [9]:
# Train model
num_epochs = 400
learning_rate = 2e-3
patience = 100
min_improvement = 0.01
best_loss_model = (-1, np.inf, None)

# Prepare model
batch_size = 32
num_convlstm_layers = 2
num_conv_layers = 2
convlstm_hidden_dims = [8] * num_convlstm_layers
conv_hidden_dims = [4] * (num_conv_layers - 1)
convlstm_kernel_size = [(5,5)] * num_convlstm_layers
conv_kernel_size = [(3,3)] * num_conv_layers
conv_activation = nn.Sigmoid
dropout = 0.2
weight_decay = 1e-5

model = conv_lstm.ConvLSTMGridWithGeophysicalInput((train_dataset.conv_height, train_dataset.conv_width), 
                                         train_dataset.n_conv_vars, landcover.shape[0], convlstm_hidden_dims, 
                                         conv_hidden_dims, convlstm_kernel_size, conv_kernel_size, 
                                         num_convlstm_layers, num_conv_layers, conv_activation, dropout=dropout).to(device)
if num_devices > 1:
    model = torch.nn.DataParallel(model, device_ids=list(range(num_devices)))
loss_fn = evaluate.NSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate, weight_decay=weight_decay)

writer = SummaryWriter()
param_description = {'time_stamp': time_stamp, 'H_convlstm': convlstm_hidden_dims, 'H_conv': conv_hidden_dims, 'batch_size': batch_size, 'num_convlstm_layers': num_convlstm_layers, 'num_conv_layers': num_conv_layers, 'convlstm_kernel_size': convlstm_kernel_size, 'conv_kernel_size': conv_kernel_size, 'loss': loss_fn, 
                     'optimizer': optimizer, 'lr': learning_rate, 'patience': patience, 'min_improvement': min_improvement, 'stateful_lstm': stateful_lstm, 'dropout': dropout, 'landcover_shape': landcover.shape, 'conv_activation': conv_activation,
                     'num_epochs': num_epochs, 'seq_len': seq_len, 'seq_steps': seq_steps, 'train_start': train_start, 'train_end': train_end, 'weight_decay': weight_decay, 'validation_fraction': validation_fraction, 'landcover_types': landcover_types,
                     'test_start': test_start, 'test_end': test_end, 'n_conv_vars': train_dataset.n_conv_vars, 'model': str(model).replace('\n','').replace(' ', ''), 'val_start': val_start, 'val_end': val_end, 'train_stations': train_stations, 'test_stations': test_stations,
                     'train len':len(train_dataset), 'conv_height': train_dataset.conv_height, 'conv_width': train_dataset.conv_width, 'test len': len(test_dataset)}
writer.add_text('Parameter Description', str(param_description))
str(param_description)

"{'time_stamp': '20190731-090931', 'H_convlstm': [8, 8], 'H_conv': [4], 'batch_size': 32, 'num_convlstm_layers': 2, 'num_conv_layers': 2, 'convlstm_kernel_size': [(5, 5), (5, 5)], 'conv_kernel_size': [(3, 3), (3, 3)], 'loss': NSELoss(), 'optimizer': Adam (\nParameter Group 0\n    amsgrad: False\n    betas: (0.9, 0.999)\n    eps: 1e-08\n    lr: 0.002\n    weight_decay: 1e-05\n), 'lr': 0.002, 'patience': 100, 'min_improvement': 0.01, 'stateful_lstm': False, 'dropout': 0.2, 'landcover_shape': torch.Size([19, 34, 39]), 'conv_activation': <class 'torch.nn.modules.activation.Sigmoid'>, 'num_epochs': 400, 'seq_len': 120, 'seq_steps': 2, 'train_start': datetime.datetime(2010, 1, 11, 0, 0), 'train_end': '2012-12-31', 'weight_decay': 1e-05, 'validation_fraction': 0.1, 'landcover_types': None, 'test_start': '2013-01-01', 'test_end': '2014-12-31', 'n_conv_vars': 8, 'model': 'ConvLSTMGridWithGeophysicalInput((conv_lstm):ConvLSTM((cell_list):ModuleList((0):ConvLSTMCell((conv):Conv2d(16,32,kernel_siz

In [10]:
if stateful_lstm:
    train_sampler = datasets.StatefulBatchSampler(train_dataset, batch_size)
    val_sampler = datasets.StatefulBatchSampler(val_dataset, batch_size)
    test_sampler = datasets.StatefulBatchSampler(test_dataset, batch_size)
    train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_sampler=train_sampler, pin_memory=True)
    val_dataloader = torch.utils.data.DataLoader(val_dataset, batch_sampler=val_sampler, pin_memory=True)
    test_dataloader = torch.utils.data.DataLoader(test_dataset, batch_sampler=test_sampler, pin_memory=True)
else:
    val_indices = np.random.choice(len(train_dataset), size=int(validation_fraction * len(train_dataset)), replace=False)
    train_indices = list(i for i in range(len(train_dataset)) if i not in val_indices)
    train_sampler = torch.utils.data.SubsetRandomSampler(train_indices)
    val_sampler = torch.utils.data.SubsetRandomSampler(val_indices)
    train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size, sampler=train_sampler, pin_memory=True, drop_last=False)
    val_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size, sampler=val_sampler, pin_memory=True, drop_last=False)
    test_dataloader = torch.utils.data.DataLoader(test_dataset, batch_size, shuffle=False, pin_memory=True, drop_last=False)

In [11]:
torch.manual_seed(0)
np.random.seed(0)
for epoch in range(num_epochs):
    model.train()

    epoch_losses = torch.tensor(0.0)
    conv_hidden_states = None
    for i, train_batch in enumerate(train_dataloader):
        y_train = train_batch['y'].reshape((train_batch['y'].shape[0],-1)).to(device, non_blocking=True)
        mask = train_batch['mask'].sum(dim=0, dtype=torch.int8).reshape(-1).to(device, non_blocking=True) != 0
        mask = (mask.to(torch.int8) + train_mask) > 1
        landcover_batch = landcover.repeat(y_train.shape[0],1,1,1).to(device, non_blocking=True)
        
        if mask.sum() == 0:
            print('Batch {} has no target values. skipping.'.format(i))
            continue
        if not stateful_lstm:
            conv_hidden_states = None
        
        y_pred, conv_hidden_states = model(train_batch['x_conv'].to(device), landcover_batch, hidden_state=conv_hidden_states)
        y_pred = y_pred.reshape((train_batch['y'].shape[0], -1))
        loss = loss_fn(y_pred[:,mask], y_train[:,mask])

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        epoch_losses += loss.detach()

    epoch_loss = (epoch_losses / len(train_dataloader)).item()
    print('Epoch', epoch, 'mean train loss:\t{}'.format(epoch_loss))
    writer.add_scalar('loss_nse', epoch_loss, epoch)
    
    # eval on validation split
    model.eval()
    val_losses = torch.tensor(0.0)
    conv_hidden_states = None
    for i, val_batch in enumerate(val_dataloader):
        y_val = val_batch['y'].reshape((val_batch['y'].shape[0],-1)).to(device, non_blocking=True)
        mask = val_batch['mask'].sum(dim=0, dtype=torch.int8).reshape(-1).to(device, non_blocking=True) != 0
        mask = (mask.to(torch.int8) + train_mask) > 0
        landcover_batch = landcover.repeat(y_val.shape[0],1,1,1).to(device, non_blocking=True)
        
        if not stateful_lstm:
            conv_hidden_states = None
        
        batch_pred, conv_hidden_states = model(val_batch['x_conv'].to(device), landcover_batch, hidden_state=conv_hidden_states)
        batch_pred = batch_pred.detach().reshape((val_batch['y'].shape[0], -1))
        val_losses += loss_fn(batch_pred[:,mask], y_val[:,mask]).detach()

    val_nse = (val_losses / len(val_dataloader)).item()
    print('Epoch {} mean val loss:  \t{}'.format(epoch, val_nse))
    writer.add_scalar('loss_nse_val', val_nse, epoch)
    if val_nse < best_loss_model[1] - min_improvement:
        best_loss_model = (epoch, val_nse, model.state_dict())  # new best model
        load_data.pickle_model('ConvLSTM_withLandcover_generalizationTest', model, 'allStations', time_stamp)
    elif epoch > best_loss_model[0] + patience:
        print('Patience exhausted in epoch {}. Best val-loss was {}'.format(epoch, best_loss_model[1]))
        break

print('Using best model from epoch', str(best_loss_model[0]), 'which had loss', str(best_loss_model[1]))
model.load_state_dict(best_loss_model[2])
load_data.pickle_model('ConvLSTM_withLandcover_generalizationTest', model, 'allStations', time_stamp)

Epoch 0 mean train loss:	1.3648072481155396
Epoch 0 mean val loss:  	1.5323761701583862
Saved model as ../pickle/models/ConvLSTM_withLandcover_generalizationTest_allStations_20190731-090931.pkl
Epoch 1 mean train loss:	1.1620817184448242
Epoch 1 mean val loss:  	1.1844403743743896
Saved model as ../pickle/models/ConvLSTM_withLandcover_generalizationTest_allStations_20190731-090931.pkl
Epoch 2 mean train loss:	1.068291425704956
Epoch 2 mean val loss:  	1.2329208850860596
Epoch 3 mean train loss:	1.0173444747924805
Epoch 3 mean val loss:  	0.9780217409133911
Saved model as ../pickle/models/ConvLSTM_withLandcover_generalizationTest_allStations_20190731-090931.pkl
Epoch 4 mean train loss:	0.9652582406997681
Epoch 4 mean val loss:  	1.025536060333252
Epoch 5 mean train loss:	0.9455944895744324
Epoch 5 mean val loss:  	1.0410311222076416
Epoch 6 mean train loss:	0.9528623819351196
Epoch 6 mean val loss:  	1.0676270723342896
Epoch 7 mean train loss:	0.9542893767356873
Epoch 7 mean val loss:  

In [12]:
logger.warning('predicting')
model.eval()

predictions = []
conv_hidden_states = None
for i, test_batch in enumerate(test_dataloader):
    if not stateful_lstm:
        conv_hidden_states = None
        
    landcover_batch = landcover.repeat(test_batch['y'].shape[0],1,1,1).to(device)
    pred, conv_hidden_states = model(test_batch['x_conv'].to(device), landcover_batch, hidden_state=conv_hidden_states)
    predictions.append(pred.detach())

predictions = torch.cat(predictions).cpu()

if stateful_lstm:
    # reorder time series
    pred_indices = np.array(list(test_sampler.__iter__())).reshape(-1)
    predictions = predictions[pred_indices.argsort()]

2019-07-31 10:19:36,887 - 20190731-090931 - predicting


In [13]:
actuals = test_dataset.data_runoff.copy()
if len(actuals['date'].unique()) != len(predictions):
    print('Warning: length of prediction {} and actuals {} does not match.'.format(len(predictions), len(actuals['date'].unique())))

nse_dict = {}
mse_dict = {}
predictions_df = pd.DataFrame(columns=actuals.columns)
predictions_df['is_test_station'] = False
for station in actuals['station'].unique():
    row, col = test_dataset.station_to_index[station]
    
    act = actuals[actuals['station'] == station].set_index('date')['runoff']
    if predictions.shape[0] != act.shape[0]:
        print('Warning: length of prediction {} and actuals {} does not match for station {}. Ignoring excess actuals.'.format(len(predictions), len(act), station))
        act = act.iloc[:predictions.shape[0]]
    pred = pd.DataFrame({'runoff': predictions[:,row,col]}, index=act.index)
    pred['station'] = station
    pred['is_test_station'] = station in test_stations
    predictions_df = predictions_df.append(pred.reset_index(), sort=True)
    
    nse, mse = evaluate.evaluate_daily(station, pred['runoff'], act, writer=writer)
    nse_dict[station] = nse
    mse_dict[station] = mse
    
    print(station, '\tNSE:', nse, '\tMSE:', mse, '(clipped to 0)')


To register the converters:
	>>> from pandas.plotting import register_matplotlib_converters
	>>> register_matplotlib_converters()


02GA047 	NSE: 0.14538179220784064 	MSE: 66.99354238769207 (clipped to 0)
04213000 	NSE: 0.17851665345589385 	MSE: 147.43098890572773 (clipped to 0)
04176500 	NSE: 0.15381132879364123 	MSE: 578.0096634881256 (clipped to 0)
02GG003 	NSE: -0.020587674411260748 	MSE: 487.2261786489084 (clipped to 0)
04214500 	NSE: 0.4872924284679363 	MSE: 61.80027718352618 (clipped to 0)
02GC026 	NSE: 0.030852395716528647 	MSE: 155.87067759132623 (clipped to 0)
04174500 	NSE: 0.07156771997782185 	MSE: 87.80577410471317 (clipped to 0)
02GG013 	NSE: 0.2975532772825302 	MSE: 26.821087784488043 (clipped to 0)
04161820 	NSE: -1.2947991994661532 	MSE: 72.4652818209487 (clipped to 0)
04159492 	NSE: 0.26261747316735295 	MSE: 342.362137463192 (clipped to 0)
04200500 	NSE: 0.16541212856181275 	MSE: 835.1762190886783 (clipped to 0)
02GB001 	NSE: -0.7202120435768076 	MSE: 12918.577762394782 (clipped to 0)
04208504 	NSE: -0.9602191813672711 	MSE: 1914.2102029854382 (clipped to 0)
04199000 	NSE: 0.2681698740544377 	MSE:

In [14]:
nse_train = list(nse_dict[s] for s in train_stations)
mse_train = list(mse_dict[s] for s in train_stations)
print('Train Median NSE (clipped to 0)', np.median(nse_train), '/ Min', np.min(nse_train), '/ Max', np.max(nse_train))
print('Train Median MSE (clipped to 0)', np.median(mse_train), '/ Min', np.min(mse_train), '/ Max', np.max(mse_train))

nse_test = list(nse_dict[s] for s in test_stations)
mse_test = list(mse_dict[s] for s in test_stations)
print('Test Median NSE (clipped to 0)', np.median(nse_test), '/ Min', np.min(nse_test), '/ Max', np.max(nse_test))
print('Test Median MSE (clipped to 0)', np.median(mse_test), '/ Min', np.min(mse_test), '/ Max', np.max(mse_test))

nse_test = list(nse_dict[s] for s in exclude_downstream_stations)
mse_test = list(mse_dict[s] for s in exclude_downstream_stations)
print('Test Median NSE (clipped to 0)', np.median(nse_test), '/ Min', np.min(nse_test), '/ Max', np.max(nse_test))
print('Test Median MSE (clipped to 0)', np.median(mse_test), '/ Min', np.min(mse_test), '/ Max', np.max(mse_test))

writer.add_scalar('nse_median', np.median(nse_test))

Train Median NSE (clipped to 0) 0.2528775523110569 / Min -0.8294192738759327 / Max 0.5341412095823092
Train Median MSE (clipped to 0) 87.80577410471317 / Min 13.415694422254814 / Max 96521.68432864714
Test Median NSE (clipped to 0) 0.14941036122010853 / Min -1.2947991994661532 / Max 0.5443177934465421
Test Median MSE (clipped to 0) 139.954706644599 / Min 19.798008716009516 / Max 1455.486501018887
Test Median NSE (clipped to 0) -0.2134729014246779 / Min -0.9602191813672711 / Max 0.12582022832120787
Test Median MSE (clipped to 0) 382.4928457698387 / Min 31.815132829014512 / Max 12918.577762394782


In [15]:
print(list((s, nse_dict[s]) for s in nse_dict.keys()))

[('02GA047', 0.14538179220784064), ('04213000', 0.17851665345589385), ('04176500', 0.15381132879364123), ('02GG003', -0.020587674411260748), ('04214500', 0.4872924284679363), ('02GC026', 0.030852395716528647), ('04174500', 0.07156771997782185), ('02GG013', 0.2975532772825302), ('04161820', -1.2947991994661532), ('04159492', 0.26261747316735295), ('04200500', 0.16541212856181275), ('02GB001', -0.7202120435768076), ('04208504', -0.9602191813672711), ('04199000', 0.2681698740544377), ('02GG002', 0.2528775523110569), ('04193500', -0.3632861986178788), ('04207200', 0.15769432621413493), ('04160600', 0.5341412095823092), ('04215000', 0.35064038287049215), ('02GB007', -0.36141992816569957), ('02GC002', 0.2035596099575414), ('02GA038', 0.14941036122010853), ('02GA010', 0.05608367825768601), ('02GG009', 0.12582022832120787), ('04165500', -0.6576079579618836), ('04198000', -0.06552587468365623), ('04213500', -0.4207951678777593), ('02GA018', 0.12590347128794344), ('02GG006', 0.3234401867266312),

In [16]:
writer.close()

In [17]:
save_df = pd.merge(predictions_df.rename({'runoff': 'prediction'}, axis=1), actuals.rename({'runoff': 'actual'}, axis=1), 
                   on=['date', 'station'])[['date', 'station', 'prediction', 'actual', 'is_test_station']]
load_data.pickle_results('ConvLSTM_withLandcover_generalizationTest', save_df, time_stamp)

'ConvLSTM_withLandcover_generalizationTest_20190731-090931.pkl'

In [18]:
datetime.now().strftime('%Y%m%d-%H%M%S')

'20190731-101953'