ConvLSTM trained on simulated streamflow.

In [1]:
%load_ext autoreload
%autoreload 2
import sys
sys.path.append('..')
import os
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt 
from datetime import datetime, timedelta
from sklearn import preprocessing
import netCDF4 as nc
import torch
from torch import nn, utils
from torch.utils.tensorboard import SummaryWriter
from src import load_data, evaluate, conv_lstm, datasets, utils
import torch.autograd as autograd
from torchvision import transforms
import torchvision.transforms.functional as TF
import random
import pickle

time_stamp = datetime.now().strftime('%Y%m%d-%H%M%S')
time_stamp

'20190819-105232'

In [2]:
import logging
logger = logging.getLogger()
fhandler = logging.FileHandler(filename='../log.out', mode='a')
chandler = logging.StreamHandler(sys.stdout)
formatter = logging.Formatter('%(asctime)s - {} - %(message)s'.format(time_stamp))
fhandler.setFormatter(formatter)
chandler.setFormatter(formatter)
logger.addHandler(fhandler)
logger.addHandler(chandler)
logger.setLevel(logging.INFO)

In [3]:
USE_CUDA = False
if torch.cuda.is_available():
    print('CUDA Available')
    USE_CUDA = True
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
device = torch.device('cuda:0' if USE_CUDA else 'cpu')
num_devices = torch.cuda.device_count() if USE_CUDA else 0
logger.warning('cuda devices: {}'.format(list(torch.cuda.get_device_name(i) for i in range(num_devices))))
torch.manual_seed(0)
np.random.seed(0)
random.seed(0)

CUDA Available
2019-08-19 10:52:32,226 - 20190819-105232 - cuda devices: ['Tesla V100-SXM2-16GB']


In [4]:
landcover_nc = nc.Dataset('../data/NA_NALCMS_LC_30m_LAEA_mmu12_urb05_n40-45w75-90_erie.nc', 'r')
landcover_nc.set_auto_mask(False)
erie_lats = landcover_nc['lat'][:][::-1]
erie_lons = landcover_nc['lon'][:]
landcover_nc.close()
erie_lat_min, erie_lat_max, erie_lon_min, erie_lon_max = erie_lats.min(), erie_lats.max(), erie_lons.min(), erie_lons.max()
del erie_lats, erie_lons

out_lats, out_lons = load_data.load_dem_lats_lons()
out_lats = out_lats[(erie_lat_min <= out_lats) & (out_lats <= erie_lat_max)][::4].copy()
out_lons = out_lons[(erie_lon_min <= out_lons) &  (out_lons <= erie_lon_max)][::4].copy()

In [5]:
seq_len = 8
seq_steps = 1
stateful_lstm = False
val_start, val_end = None, None
validation_fraction = 0.005  # fraction of subbasins
p_random_transform = 0.0

if stateful_lstm:
    val_start = datetime.strptime('2010-01-01', '%Y-%m-%d') + timedelta(days=seq_len * seq_steps)  # first day for which to make a prediction in train set
    val_end = '2010-09-30'
    train_start = '2010-10-01'
    train_end = '2012-12-31'
else:
    train_start = datetime.strptime('2010-01-01', '%Y-%m-%d') + timedelta(days=seq_len * seq_steps)  # first day for which to make a prediction in train set
    train_end = '2012-12-31'
test_start = '2013-01-01'
test_end = '2014-12-31'

In [6]:
rdrs_vars = [4, 5]
agg = ['sum', 'minmax']
include_month = False
train_dataset = datasets.RdrsGridDataset(rdrs_vars, seq_len, seq_steps, train_start, train_end, aggregate_daily=agg, include_months=True, include_simulated_streamflow=True, resample_rdrs=True, out_lats=out_lats, out_lons=out_lons)
if stateful_lstm:
    val_dataset = datasets.RdrsGridDataset(rdrs_vars, seq_len, seq_steps, val_start, val_end, conv_scalers=train_dataset.conv_scalers, aggregate_daily=agg,  include_months=True, include_simulated_streamflow=True, resample_rdrs=True, out_lats=out_lats, out_lons=out_lons)
test_dataset = datasets.RdrsGridDataset(rdrs_vars, seq_len, seq_steps, test_start, test_end, conv_scalers=train_dataset.conv_scalers, aggregate_daily=agg, include_months=True, include_simulated_streamflow=True, resample_rdrs=True, out_lats=out_lats, out_lons=out_lons)

  exec(code_obj, self.user_global_ns, self.user_ns)
  exec(code_obj, self.user_global_ns, self.user_ns)


In [7]:
landcover_types = []
geophysical_dataset = datasets.GeophysicalGridDataset(dem=True, landcover=False, soil=False, groundwater=False, min_lat=erie_lat_min, max_lat=erie_lat_max, min_lon=erie_lon_min, max_lon=erie_lon_max, landcover_types=landcover_types)
geophysical_data = next(geophysical_dataset.__iter__())[:,::4,::4].clone().detach()
geophysical_dataset.shape = geophysical_data.shape

In [8]:
subbasins = train_dataset.simulated_streamflow['subbasin'].unique()
np.random.seed(0)
test_subbasins = np.random.choice(subbasins, size=int(0.2 * len(subbasins)), replace=False)
val_subbasins = np.random.choice(list(s for s in subbasins if s not in test_subbasins), size=int(validation_fraction * len(subbasins)), replace=False)
train_subbasins = list(s for s in subbasins if s not in test_subbasins and s not in val_subbasins)
station_subbasins = train_dataset.simulated_streamflow[~pd.isna(train_dataset.simulated_streamflow['StationID'])]['subbasin'].unique()

train_subbasin_indices = list(train_dataset.outlet_to_row_col[s] for s in train_subbasins)
val_subbasin_indices = list(train_dataset.outlet_to_row_col[s] for s in val_subbasins)
test_subbasin_indices = list(test_dataset.outlet_to_row_col[s] for s in test_subbasins)

train_mask = torch.zeros((train_dataset.out_lats.shape[0], train_dataset.out_lats.shape[1]), dtype=torch.bool)
val_mask = torch.zeros((train_dataset.out_lats.shape[0], train_dataset.out_lats.shape[1]), dtype=torch.bool)
for row in range(train_mask.shape[0]):
    for col in range(train_mask.shape[1]):
        train_mask[row, col] = True if (row, col) in train_subbasin_indices else False
        val_mask[row, col] = True if (row, col) in val_subbasin_indices else False
train_mask = train_mask
val_mask = val_mask

In [9]:
# Train model
num_epochs = 600
learning_rate = 2e-3
patience = 200
min_improvement = 0.01
best_loss_model = (-1, np.inf, None)

# Prepare model
batch_size = 4
num_convlstm_layers = 4
num_conv_layers = 1
convlstm_hidden_dims = [8,8,8,8]
conv_hidden_dims = []
convlstm_kernel_size = [(5,5)] * num_convlstm_layers
conv_kernel_size = [(5,5)] * num_conv_layers
conv_activation = nn.LeakyReLU
dropout = 0.2
weight_decay = 1e-5
feed_timesteps = seq_len

model = conv_lstm.ConvLSTMGridWithGeophysicalInput((geophysical_dataset.shape[1], geophysical_dataset.shape[2]), train_dataset.n_conv_vars + geophysical_dataset.shape[0], 
                                                   0, convlstm_hidden_dims, conv_hidden_dims, convlstm_kernel_size, 
                                                   conv_kernel_size, num_convlstm_layers, num_conv_layers, conv_activation, dropout=dropout, 
                                                   geophysical_size=None, feed_timesteps=feed_timesteps).to(device)
if num_devices > 1:
    model = torch.nn.DataParallel(model, device_ids=list(range(num_devices)))
loss_fn = evaluate.NSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate, weight_decay=weight_decay)

writer = SummaryWriter(comment='ConvLSTM_simulationTraining')
param_description = {'time_stamp': time_stamp, 'H_convlstm': convlstm_hidden_dims, 'H_conv': conv_hidden_dims, 'batch_size': batch_size, 'num_convlstm_layers': num_convlstm_layers, 'num_conv_layers': num_conv_layers, 'convlstm_kernel_size': convlstm_kernel_size, 'conv_kernel_size': conv_kernel_size, 'loss': loss_fn, 
                     'optimizer': optimizer, 'lr': learning_rate, 'patience': patience, 'min_improvement': min_improvement, 'stateful_lstm': stateful_lstm, 'dropout': dropout, 'geophys_shape': geophysical_dataset.shape, 'conv_activation': conv_activation,
                     'num_epochs': num_epochs, 'seq_len': seq_len, 'seq_steps': seq_steps, 'train_start': train_start, 'train_end': train_end, 'weight_decay': weight_decay, 'validation_fraction': validation_fraction, 'landcover_types': landcover_types,
                     'test_start': test_start, 'test_end': test_end, 'n_conv_vars': train_dataset.n_conv_vars, 'model': str(model).replace('\n','').replace(' ', ''), 'val_start': val_start, 'val_end': val_end, 'feed_timesteps': feed_timesteps,
                     'train len': len(train_dataset), 'conv_height': train_dataset.conv_height, 'conv_width': train_dataset.conv_width, 'test len': len(test_dataset), 'p_random_transform': p_random_transform}
writer.add_text('Parameter Description', str(param_description))
str(param_description)

"{'time_stamp': '20190819-105232', 'H_convlstm': [8, 8, 8, 8], 'H_conv': [], 'batch_size': 4, 'num_convlstm_layers': 4, 'num_conv_layers': 1, 'convlstm_kernel_size': [(5, 5), (5, 5), (5, 5), (5, 5)], 'conv_kernel_size': [(5, 5)], 'loss': NSELoss(), 'optimizer': Adam (\nParameter Group 0\n    amsgrad: False\n    betas: (0.9, 0.999)\n    eps: 1e-08\n    lr: 0.002\n    weight_decay: 1e-05\n), 'lr': 0.002, 'patience': 200, 'min_improvement': 0.01, 'stateful_lstm': False, 'dropout': 0.2, 'geophys_shape': torch.Size([1, 115, 212]), 'conv_activation': <class 'torch.nn.modules.activation.LeakyReLU'>, 'num_epochs': 600, 'seq_len': 8, 'seq_steps': 1, 'train_start': datetime.datetime(2010, 1, 9, 0, 0), 'train_end': '2012-12-31', 'weight_decay': 1e-05, 'validation_fraction': 0.005, 'landcover_types': [], 'test_start': '2013-01-01', 'test_end': '2014-12-31', 'n_conv_vars': 15, 'model': 'ConvLSTMGridWithGeophysicalInput((conv_lstm):ConvLSTM((cell_list):ModuleList((0):ConvLSTMCell((conv):Conv2d(24,32

In [10]:
stride = geophysical_dataset.shape[1] // train_dataset.conv_height, geophysical_dataset.shape[2] // train_dataset.conv_width
upsample_kernel = (geophysical_dataset.shape[1] + stride[0] * (1 - train_dataset.conv_height), 
                   geophysical_dataset.shape[2] + stride[1] * (1 - train_dataset.conv_width))
upsample_model = nn.ConvTranspose2d(1, 1, upsample_kernel, stride=stride, bias=False)
upsample_model.weight = nn.Parameter(torch.ones(upsample_model.weight.shape))
upsample_model.eval()  # Model is just for upsampling, not for learning

ConvTranspose2d(1, 1, kernel_size=(16, 22), stride=(3, 5), bias=False)

In [11]:
if stateful_lstm:
    train_sampler = datasets.StatefulBatchSampler(train_dataset, batch_size)
    val_sampler = datasets.StatefulBatchSampler(val_dataset, batch_size)
    test_sampler = datasets.StatefulBatchSampler(test_dataset, batch_size)
    train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_sampler=train_sampler, pin_memory=True)
    val_dataloader = torch.utils.data.DataLoader(val_dataset, batch_sampler=val_sampler, pin_memory=True)
    test_dataloader = torch.utils.data.DataLoader(test_dataset, batch_sampler=test_sampler, pin_memory=True)
else:
    train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size, shuffle=True, pin_memory=True, drop_last=False)
    test_dataloader = torch.utils.data.DataLoader(test_dataset, batch_size, shuffle=False, pin_memory=True, drop_last=False)
    
geophysical_batch = geophysical_data.repeat(batch_size,seq_len,1,1,1)

In [12]:
torch.manual_seed(0)
np.random.seed(0)
for epoch in range(num_epochs):
    model.train()

    train_losses = torch.tensor(0.0)
    val_losses = torch.tensor(0.0)
    conv_hidden_states = None
    for i, train_batch in enumerate(train_dataloader):
        geophysical_input = geophysical_batch[:train_batch['y_sim'].shape[0]]
        
        # Upsample x_conv to geophysical input size
        with torch.no_grad():
            x_conv = upsample_model(train_batch['x_conv'].reshape(-1,1,*train_batch['x_conv'].shape[-2:]))
        x_conv = x_conv.reshape(*train_batch['x_conv'].shape[0:3], *geophysical_input.shape[-2:])
        x_conv = torch.cat([geophysical_input, x_conv], dim=2)
        x_conv, geophysical_input, y_train, y_sim_means, train_mask_transformed, val_mask_transformed = \
            utils.random_transform(x_conv, geophysical_input, train_batch['y_sim'], train_dataset.y_sim_means, 
                                   train_mask, val_mask, rdrs_contains_month=include_month, border_masking=20, p=p_random_transform)
        y_train = y_train.reshape((y_train.shape[0],-1)).to(device, non_blocking=True)
        y_sim_means = y_sim_means.reshape(-1).to(device, non_blocking=True)
        train_mask_transformed = train_mask_transformed.reshape(-1).to(device, non_blocking=True)
        val_mask_transformed = val_mask_transformed.reshape(-1).to(device, non_blocking=True)
        
        if not train_mask_transformed.any():
            print('Batch {} has no target values. skipping.'.format(i))
            continue
        if not stateful_lstm:
            conv_hidden_states = None
        
        dummy_geophysical_input = torch.zeros((x_conv.shape[0],0,*geophysical_input.shape[-2:]), device=device)
        y_pred, conv_hidden_states = model(x_conv.to(device), dummy_geophysical_input.to(device), hidden_state=conv_hidden_states)
        y_pred = y_pred.reshape((y_train.shape[0], -1))
        train_loss = loss_fn(y_pred[:,train_mask_transformed], y_train[:,train_mask_transformed], 
                             means=y_sim_means[train_mask_transformed])
        val_losses += loss_fn(y_pred[:,val_mask_transformed], y_train[:,val_mask_transformed], 
                              means=y_sim_means[val_mask_transformed]).detach()

        optimizer.zero_grad()
        train_loss.backward()
        optimizer.step()
        
        train_losses += train_loss.detach()
        
    train_loss = (train_losses / len(train_dataloader)).item()
    val_loss = (val_losses / len(train_dataloader)).item()
    print('Epoch', epoch, 'mean train loss:\t{}'.format(train_loss))
    print('Epoch', epoch, 'mean val loss:\t{}'.format(val_loss))
    writer.add_scalar('loss_nse', train_loss, epoch)
    writer.add_scalar('loss_nse_val', val_loss, epoch)
    
    if train_loss < best_loss_model[1] - min_improvement:
        best_loss_model = (epoch, train_loss, model.state_dict())  # new best model
        load_data.pickle_model('ConvLSTM_simulationTraining', model, 'allStations', time_stamp)
    elif epoch > best_loss_model[0] + patience:
        print('Patience exhausted in epoch {}. Best train-loss was {}'.format(epoch, best_loss_model[1]))
        break
    
print('Using best model from epoch', str(best_loss_model[0]), 'which had loss', str(best_loss_model[1]))
model.load_state_dict(best_loss_model[2])
load_data.save_model_with_state('ConvLSTM_simulationTraining', best_loss_model[0], model, optimizer, time_stamp)

Epoch 0 mean train loss:	1.495589017868042
Epoch 0 mean val loss:	1.9369527101516724
Saved model as /home/mgauch/runoff-nn/src/../pickle/models/ConvLSTM_simulationTraining_allStations_20190819-105232.pkl
Epoch 1 mean train loss:	1.5227603912353516
Epoch 1 mean val loss:	1.9703930616378784
Epoch 2 mean train loss:	1.4720699787139893
Epoch 2 mean val loss:	1.892807960510254
Saved model as /home/mgauch/runoff-nn/src/../pickle/models/ConvLSTM_simulationTraining_allStations_20190819-105232.pkl
Epoch 3 mean train loss:	1.464316964149475
Epoch 3 mean val loss:	1.8652057647705078
Epoch 4 mean train loss:	1.4470661878585815
Epoch 4 mean val loss:	2.3328425884246826
Saved model as /home/mgauch/runoff-nn/src/../pickle/models/ConvLSTM_simulationTraining_allStations_20190819-105232.pkl
Epoch 5 mean train loss:	1.4465067386627197
Epoch 5 mean val loss:	1.9701123237609863
Epoch 6 mean train loss:	1.4753999710083008
Epoch 6 mean val loss:	1.9661306142807007
Epoch 7 mean train loss:	1.4404399394989014


In [13]:
del train_mask_transformed, val_mask_transformed, y_train, y_pred, y_sim_means
if USE_CUDA:
    torch.cuda.empty_cache()

In [14]:
logger.warning('predicting')
model.eval()

predictions = []
conv_hidden_states = None
for i, test_batch in enumerate(test_dataloader):
    if not stateful_lstm:
        conv_hidden_states = None
        
    geophysical_input = geophysical_batch[:test_batch['y_sim'].shape[0]]
    with torch.no_grad():
        x_conv = upsample_model(test_batch['x_conv'].reshape(-1,1,*test_batch['x_conv'].shape[-2:]))
    x_conv = x_conv.reshape(*test_batch['x_conv'].shape[0:3], *geophysical_input.shape[-2:])
    x_conv = torch.cat([geophysical_input, x_conv], dim=2)
    dummy_geophysical_input = torch.zeros((x_conv.shape[0],0,*geophysical_input.shape[-2:]), device=device)
    pred, conv_hidden_states = model(x_conv.to(device), dummy_geophysical_input.to(device), hidden_state=conv_hidden_states)
    predictions.append(pred.detach().cpu())
    
predictions = torch.cat(predictions).cpu()

if stateful_lstm:
    # reorder time series
    pred_indices = np.array(list(test_sampler.__iter__())).reshape(-1)
    predictions = predictions[pred_indices.argsort()]

2019-08-20 05:05:41,053 - 20190819-105232 - predicting


In [15]:
actuals = test_dataset.data_runoff.copy()
if len(actuals['date'].unique()) != len(predictions):
    print('Warning: length of prediction {} and actuals {} does not match.'.format(len(predictions), len(actuals['date'].unique())))

nse_dict, nse_sim_dict = {}, {}
mse_dict, mse_sim_dict = {}, {}
predictions_df = pd.DataFrame(columns=actuals.columns)
predictions_df['is_test_subbasin'] = False
predictions_df['is_val_subbasin'] = False
for subbasin in test_dataset.simulated_streamflow['subbasin'].unique():
    row, col = test_dataset.outlet_to_row_col[subbasin]
    
    station = None
    subbasin_sim = test_dataset.simulated_streamflow[test_dataset.simulated_streamflow['subbasin'] == subbasin].set_index('date')
    if subbasin in station_subbasins:
        station = subbasin_sim['StationID'].values[0]
        act = actuals[actuals['station'] == station].set_index('date')['runoff']
    if predictions.shape[0] != subbasin_sim.shape[0]:
        print('Warning: length of prediction {} and actuals {} does not match for subbasin {}. Ignoring excess actuals.'.format(len(predictions), len(subbasin_sim), subbasin))
        subbasin_sim = subbasin_sim.iloc[:predictions.shape[0]]
        if station is not None:
            act = act.iloc[:predictions.shape[0]]
    pred = pd.DataFrame({'runoff': predictions[:,row,col]}, index=subbasin_sim.index)
    pred['subbasin'] = subbasin
    pred['station'] = station
    pred['is_test_subbasin'] = subbasin in test_subbasins
    pred['is_val_subbasin'] = subbasin in val_subbasins
    predictions_df = predictions_df.append(pred.reset_index(), sort=True)
    subbasin_type = 'test' if subbasin in test_subbasins else ('val' if subbasin in val_subbasins else 'train')
    nse_sim, mse_sim = evaluate.evaluate_daily('Sub{}'.format(subbasin), pred['runoff'], subbasin_sim['simulated_streamflow'], writer=writer, group=subbasin_type)
    nse_sim_dict[subbasin] = nse_sim
    mse_sim_dict[subbasin] = mse_sim

    if station is not None:
        nse, mse = evaluate.evaluate_daily(station, pred['runoff'], act, writer=writer)
        nse_dict[subbasin] = nse
        mse_dict[subbasin] = mse
        print(station, subbasin, '\tNSE:', nse, '\tMSE:', mse, '(clipped to 0)')
    print(subbasin, '\tNSE sim:', nse_sim, '\tMSE sim:', mse_sim)


To register the converters:
	>>> from pandas.plotting import register_matplotlib_converters
	>>> register_matplotlib_converters()


1 	NSE sim: -1.1470192991291506 	MSE sim: 2483946.5975725134
2 	NSE sim: -0.03676537676560665 	MSE sim: 87.99803987056266
3 	NSE sim: 0.3123557658165468 	MSE sim: 21.882185826751652
4 	NSE sim: 0.2693041771618965 	MSE sim: 12.450702649693284
5 	NSE sim: 0.35611825124940444 	MSE sim: 15.617000114855314
6 	NSE sim: 0.3085547027639285 	MSE sim: 4.012349035885042
7 	NSE sim: 0.3768814597646052 	MSE sim: 1.7276710453700748
8 	NSE sim: 0.37541839221842066 	MSE sim: 4.754622668053476
9 	NSE sim: 0.4401227148504919 	MSE sim: 2.2726101457751384
10 	NSE sim: 0.435909204725761 	MSE sim: 3.215224598009104
11 	NSE sim: 0.33259180081579964 	MSE sim: 8.144477110493298
12 	NSE sim: 0.43483155695977893 	MSE sim: 1.6631750251576518
13 	NSE sim: 0.45715189017395563 	MSE sim: 2.0833483116236953
14 	NSE sim: 0.2624944011220631 	MSE sim: 197.8778997248644
15 	NSE sim: 0.29458879981969177 	MSE sim: 85.0208328837943
16 	NSE sim: 0.31832211957576173 	MSE sim: 9.973634709380782
17 	NSE sim: 0.2923535465440602 	

  np.sum((evaluation - np.mean(evaluation)) ** 2, dtype=np.float64))


499 	NSE sim: -inf 	MSE sim: 0.0314252548596051
500 	NSE sim: 0.2573219639724018 	MSE sim: 8.713323122674275
501 	NSE sim: -0.34802498005439686 	MSE sim: 0.05368970353425285
502 	NSE sim: 0.10194819188739235 	MSE sim: 0.09362418874795113
503 	NSE sim: 0.08830925903225406 	MSE sim: 0.024603828417397975
504 	NSE sim: -0.8704378969931932 	MSE sim: 0.012588344732610177
505 	NSE sim: 0.23241561736197613 	MSE sim: 2.182754011812553
506 	NSE sim: 0.21676046003798588 	MSE sim: 27.54197457099832
507 	NSE sim: -619.5107824033579 	MSE sim: 8.642911878095877
508 	NSE sim: 0.31831689788035955 	MSE sim: 1.7764879829766451
509 	NSE sim: -0.5670609478243549 	MSE sim: 90.17434723581209
510 	NSE sim: 0.02962624133006131 	MSE sim: 0.9385893463856653
511 	NSE sim: 0.3471330862301628 	MSE sim: 0.4755491271068799
512 	NSE sim: -0.12958160661411 	MSE sim: 264.7705173712262
513 	NSE sim: 0.2545986187881646 	MSE sim: 163.76296986068004
514 	NSE sim: 0.005581950673096192 	MSE sim: 0.9431304751703147
515 	NSE si

  np.sum((evaluation - np.mean(evaluation)) ** 2, dtype=np.float64))


526 	NSE sim: -467465.81438694056 	MSE sim: 181.5984342635654
527 	NSE sim: -408.68571413439645 	MSE sim: 6.717468628908665
528 	NSE sim: 0.34599766909130136 	MSE sim: 0.04417304321340424
529 	NSE sim: -0.16168448585870676 	MSE sim: 0.011974255524525705
530 	NSE sim: 0.24943687764660705 	MSE sim: 3.961197755049562
531 	NSE sim: 0.33232278659351056 	MSE sim: 0.7980446638522729
532 	NSE sim: 0.20303889571533762 	MSE sim: 0.2619510488307196
533 	NSE sim: -391.30598875755 	MSE sim: 0.7706914373093244
534 	NSE sim: 0.1803144515160493 	MSE sim: 0.5011326185648886
535 	NSE sim: 0.07989010705104649 	MSE sim: 0.2859825255270021
536 	NSE sim: 0.31459088005553704 	MSE sim: 1.8361902924069473
537 	NSE sim: 0.19267380338756024 	MSE sim: 1.3038114150911149
538 	NSE sim: 0.26517030075701453 	MSE sim: 125.79129071994348
539 	NSE sim: 0.13367879022108642 	MSE sim: 0.016836951130446785
540 	NSE sim: -0.020877702508106166 	MSE sim: 5323.287724316837
541 	NSE sim: 0.20999451170036865 	MSE sim: 426.6397046

In [16]:
def print_nse_mse(name, nse_dict, mse_dict, subbasins):
    nses = list(nse_dict[s] for s in subbasins)
    mses = list(mse_dict[s] for s in subbasins)
    print(name, 'Median NSE (clipped to 0)', np.median(nses), '/ Min', np.min(nses), '/ Max', np.max(nses))
    print(name, 'Median MSE (clipped to 0)', np.median(mses), '/ Min', np.min(mses), '/ Max', np.max(mses))
    
    return np.median(nses)

print_nse_mse('Train sim', nse_sim_dict, mse_sim_dict, train_subbasins)
print_nse_mse('Val sim', nse_sim_dict, mse_sim_dict, val_subbasins)
nse_median_sim_test = print_nse_mse('Test sim', nse_sim_dict, mse_sim_dict, train_subbasins)
nse_median_stations_train_val = print_nse_mse('Stations (Train/Val)', nse_dict, mse_dict, list(s for s in station_subbasins if s not in test_subbasins))
nse_median_stations_test = print_nse_mse('Stations (Test)', nse_dict, mse_dict, list(s for s in station_subbasins if s in test_subbasins))
nse_median_stations = print_nse_mse('Stations (Train/Val/Test)', nse_dict, mse_dict, station_subbasins)

writer.add_scalar('nse_median_sim', nse_median_sim_test)
writer.add_scalar('nse_median_stations_test', nse_median_stations_test)
writer.add_scalar('nse_median_stations_all', nse_median_stations)

Train sim Median NSE (clipped to 0) 0.20199649786882068 / Min -467465.81438694056 / Max 0.5165316167268138
Train sim Median MSE (clipped to 0) 13.576014918480793 / Min 0.004538181499609239 / Max 2483946.5975725134
Val sim Median NSE (clipped to 0) 0.010542079040518737 / Min -0.41105442431196404 / Max 0.265598741898016
Val sim Median MSE (clipped to 0) 50.716731277427655 / Min 4.85930854167773 / Max 507.3391196248983
Test sim Median NSE (clipped to 0) 0.20199649786882068 / Min -467465.81438694056 / Max 0.5165316167268138
Test sim Median MSE (clipped to 0) 13.576014918480793 / Min 0.004538181499609239 / Max 2483946.5975725134
Stations (Train/Val) Median NSE (clipped to 0) 0.03862838412413405 / Min -1.8136374460794742 / Max 0.37629271414686327
Stations (Train/Val) Median MSE (clipped to 0) 162.5016499921704 / Min 18.400618299776596 / Max 100022.58776407332
Stations (Test) Median NSE (clipped to 0) -0.10973035691307176 / Min -0.19526031612428274 / Max 0.20180297565450456
Stations (Test) Me

In [17]:
nse_dict

{676: 0.028471804994045113,
 677: -0.1368931535943232,
 678: 0.17046627723120356,
 680: 0.37629271414686327,
 681: 0.3639807792117975,
 682: 0.04105345755352807,
 683: 0.20180297565450456,
 684: 0.1276294322603393,
 685: 0.001765934823781845,
 686: 0.19812722437358865,
 687: -0.30597277788536115,
 688: -0.14435358077979776,
 689: -1.8136374460794742,
 690: -0.27279356169020885,
 691: -0.14712662160713452,
 692: 0.1691177285949812,
 693: 0.0424742551745253,
 694: -0.06843815259412755,
 695: 0.015745670392033673,
 696: -0.41663273948266255,
 697: -0.024968570664277934,
 698: -0.19526031612428274,
 699: 0.08768652361209861,
 700: 0.08085013259720886,
 701: 0.15246879518369993,
 702: -0.02554545414369014,
 703: -0.13925514475061407,
 704: 0.066639643492166,
 705: 0.10743907897631355,
 706: 0.09007112509691584,
 707: -0.32766423771892006,
 709: -0.08872076072267499,
 710: 0.10467176528734867,
 712: 0.06951577546178422,
 713: -0.4127334639594131,
 714: 0.0815367996753037,
 715: -0.0552363205

In [18]:
writer.close()

In [19]:
save_df = pd.merge(predictions_df.rename({'runoff': 'prediction'}, axis=1), 
                   test_dataset.simulated_streamflow, on=['date', 'subbasin'])
save_df = pd.merge(save_df, actuals.rename({'runoff': 'actual'}, axis=1), how='left', on=['date', 'station'])\
            [['date', 'subbasin', 'station', 'prediction', 'actual', 'simulated_streamflow', 'is_test_subbasin', 'is_val_subbasin']]
load_data.pickle_results('ConvLSTM_simulationTraining', save_df, time_stamp)

'ConvLSTM_simulationTraining_20190819-105232.pkl'

In [20]:
_ = print(train_subbasins), print(val_subbasins), print(test_subbasins)

[1, 3, 4, 5, 6, 7, 8, 10, 12, 13, 14, 16, 17, 18, 20, 21, 22, 23, 24, 25, 26, 27, 29, 30, 31, 33, 34, 35, 36, 37, 39, 40, 42, 43, 44, 45, 46, 47, 48, 52, 54, 55, 57, 58, 59, 60, 61, 62, 63, 64, 66, 67, 68, 70, 71, 72, 73, 74, 75, 78, 79, 81, 82, 83, 84, 85, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 105, 106, 107, 108, 109, 110, 111, 112, 113, 115, 116, 117, 118, 120, 121, 122, 124, 125, 126, 127, 128, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 159, 160, 161, 162, 164, 165, 166, 167, 168, 169, 170, 172, 174, 175, 177, 178, 179, 180, 181, 183, 184, 185, 186, 187, 189, 190, 191, 192, 193, 194, 195, 196, 198, 199, 200, 201, 202, 203, 204, 206, 207, 208, 209, 210, 211, 212, 213, 214, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 231, 233, 234, 235, 237, 238, 239, 241, 242, 243, 245, 246, 247, 249, 250, 251, 254, 255, 257, 260, 261, 262, 263, 265, 266, 270, 

In [21]:
list(k for k in list(nse_dict.keys()) if k in test_subbasins)

[683, 688, 697, 698, 700, 709, 716, 717, 718, 720]

In [22]:
datetime.now().strftime('%Y%m%d-%H%M%S')

'20190820-050843'