ConvLSTM trained on simulated streamflow.

In [1]:
%load_ext autoreload
%autoreload 2
import sys
sys.path.append('..')
import os
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt 
from datetime import datetime, timedelta
from sklearn import preprocessing
import netCDF4 as nc
import torch
from torch import nn, utils
from torch.utils.tensorboard import SummaryWriter
from src import load_data, evaluate, conv_lstm, datasets, utils
import torch.autograd as autograd
from torchvision import transforms
import torchvision.transforms.functional as TF
import random
import pickle

time_stamp = datetime.now().strftime('%Y%m%d-%H%M%S')
time_stamp

'20190814-220901'

In [2]:
import logging
logger = logging.getLogger()
fhandler = logging.FileHandler(filename='../log.out', mode='a')
chandler = logging.StreamHandler(sys.stdout)
formatter = logging.Formatter('%(asctime)s - {} - %(message)s'.format(time_stamp))
fhandler.setFormatter(formatter)
chandler.setFormatter(formatter)
logger.addHandler(fhandler)
logger.addHandler(chandler)
logger.setLevel(logging.INFO)

In [3]:
USE_CUDA = False
if torch.cuda.is_available():
    print('CUDA Available')
    USE_CUDA = True
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
device = torch.device('cuda:0' if USE_CUDA else 'cpu')
num_devices = torch.cuda.device_count() if USE_CUDA else 0
logger.warning('cuda devices: {}'.format(list(torch.cuda.get_device_name(i) for i in range(num_devices))))
torch.manual_seed(0)
np.random.seed(0)
random.seed(0)

CUDA Available
2019-08-14 22:09:01,850 - 20190814-220901 - cuda devices: ['Tesla V100-SXM2-16GB']


In [4]:
landcover_nc = nc.Dataset('../data/NA_NALCMS_LC_30m_LAEA_mmu12_urb05_n40-45w75-90_erie.nc', 'r')
landcover_nc.set_auto_mask(False)
erie_lats = landcover_nc['lat'][:][::-1]
erie_lons = landcover_nc['lon'][:]
landcover_nc.close()
erie_lat_min, erie_lat_max, erie_lon_min, erie_lon_max = erie_lats.min(), erie_lats.max(), erie_lons.min(), erie_lons.max()
del erie_lats, erie_lons

out_lats, out_lons = load_data.load_dem_lats_lons()
out_lats = out_lats[(erie_lat_min <= out_lats) & (out_lats <= erie_lat_max)].copy()
out_lons = out_lons[(erie_lon_min <= out_lons) &  (out_lons <= erie_lon_max)].copy()

In [5]:
seq_len = 8
seq_steps = 1
stateful_lstm = False
validation_fraction, val_start, val_end = None, None, None
p_random_transform = 0.5

if stateful_lstm:
    val_start = datetime.strptime('2010-01-01', '%Y-%m-%d') + timedelta(days=seq_len * seq_steps)  # first day for which to make a prediction in train set
    val_end = '2010-09-30'
    train_start = '2010-10-01'
    train_end = '2012-12-31'
else:
    validation_fraction = 0.1
    train_start = datetime.strptime('2010-01-01', '%Y-%m-%d') + timedelta(days=seq_len * seq_steps)  # first day for which to make a prediction in train set
    train_end = '2012-12-31'
test_start = '2013-01-01'
test_end = '2014-12-31'

In [6]:
rdrs_vars = [4, 5]
agg = ['sum', 'minmax']
include_month = False
train_dataset = datasets.RdrsGridDataset(rdrs_vars, seq_len, seq_steps, train_start, train_end, aggregate_daily=agg, include_months=True, include_simulated_streamflow=True, resample_rdrs=True, out_lats=out_lats, out_lons=out_lons)
if stateful_lstm:
    val_dataset = datasets.RdrsGridDataset(rdrs_vars, seq_len, seq_steps, val_start, val_end, conv_scalers=train_dataset.conv_scalers, aggregate_daily=agg,  include_months=True, include_simulated_streamflow=True, resample_rdrs=True, out_lats=out_lats, out_lons=out_lons)
test_dataset = datasets.RdrsGridDataset(rdrs_vars, seq_len, seq_steps, test_start, test_end, conv_scalers=train_dataset.conv_scalers, aggregate_daily=agg, include_months=True, include_simulated_streamflow=True, resample_rdrs=True, out_lats=out_lats, out_lons=out_lons)

  exec(code_obj, self.user_global_ns, self.user_ns)
  exec(code_obj, self.user_global_ns, self.user_ns)


In [7]:
landcover_types = []
geophysical_dataset = datasets.GeophysicalGridDataset(dem=True, landcover=False, soil=False, groundwater=False, min_lat=erie_lat_min, max_lat=erie_lat_max, min_lon=erie_lon_min, max_lon=erie_lon_max, landcover_types=landcover_types)
geophysical_data = next(geophysical_dataset.__iter__())

In [8]:
subbasins = train_dataset.simulated_streamflow['subbasin'].unique()
np.random.seed(0)
test_subbasins = np.random.choice(subbasins, size=int(0.2 * len(subbasins)), replace=False)
val_subbasins = np.random.choice(list(s for s in subbasins if s not in test_subbasins), size=int(validation_fraction * len(subbasins)), replace=False)
train_subbasins = list(s for s in subbasins if s not in test_subbasins and s not in val_subbasins)
station_subbasins = train_dataset.simulated_streamflow[~pd.isna(train_dataset.simulated_streamflow['StationID'])]['subbasin'].unique()

train_subbasin_indices = list(train_dataset.outlet_to_row_col[s] for s in train_subbasins)
val_subbasin_indices = list(train_dataset.outlet_to_row_col[s] for s in val_subbasins)
test_subbasin_indices = list(test_dataset.outlet_to_row_col[s] for s in test_subbasins)

train_mask = torch.zeros((train_dataset.out_lats.shape[0], train_dataset.out_lats.shape[1]), dtype=torch.bool)
val_mask = torch.zeros((train_dataset.out_lats.shape[0], train_dataset.out_lats.shape[1]), dtype=torch.bool)
for row in range(train_mask.shape[0]):
    for col in range(train_mask.shape[1]):
        train_mask[row, col] = True if (row, col) in train_subbasin_indices else False
        val_mask[row, col] = True if (row, col) in val_subbasin_indices else False
train_mask = train_mask
val_mask = val_mask

In [9]:
# Train model
num_epochs = 150
learning_rate = 2e-3
patience = 100
min_improvement = 0.01
best_loss_model = (-1, np.inf, None)

# Prepare model
batch_size = 2
num_convlstm_layers = 5
num_conv_layers = 8
convlstm_hidden_dims = [8,8,8,8,8]
conv_hidden_dims = [64,32,16,16,8,8,4]
convlstm_kernel_size = [(5,5)] * num_convlstm_layers
conv_kernel_size = [(5,5)] * num_conv_layers
conv_activation = nn.LeakyReLU
dropout = 0.3
weight_decay = 1e-5
feed_timesteps = seq_len

model = conv_lstm.ConvLSTMGridWithGeophysicalInput((train_dataset.conv_height, train_dataset.conv_width), train_dataset.n_conv_vars, 
                                                   geophysical_dataset.shape[0], convlstm_hidden_dims, conv_hidden_dims, convlstm_kernel_size, 
                                                   conv_kernel_size, num_convlstm_layers, num_conv_layers, conv_activation, dropout=dropout, 
                                                   geophysical_size=geophysical_dataset.shape[1:], feed_timesteps=feed_timesteps).to(device)
if num_devices > 1:
    model = torch.nn.DataParallel(model, device_ids=list(range(num_devices)))
loss_fn = evaluate.NSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate, weight_decay=weight_decay)

writer = SummaryWriter(comment='ConvLSTM_simulationTraining')
param_description = {'time_stamp': time_stamp, 'H_convlstm': convlstm_hidden_dims, 'H_conv': conv_hidden_dims, 'batch_size': batch_size, 'num_convlstm_layers': num_convlstm_layers, 'num_conv_layers': num_conv_layers, 'convlstm_kernel_size': convlstm_kernel_size, 'conv_kernel_size': conv_kernel_size, 'loss': loss_fn, 
                     'optimizer': optimizer, 'lr': learning_rate, 'patience': patience, 'min_improvement': min_improvement, 'stateful_lstm': stateful_lstm, 'dropout': dropout, 'geophys_shape': geophysical_dataset.shape, 'conv_activation': conv_activation,
                     'num_epochs': num_epochs, 'seq_len': seq_len, 'seq_steps': seq_steps, 'train_start': train_start, 'train_end': train_end, 'weight_decay': weight_decay, 'validation_fraction': validation_fraction, 'landcover_types': landcover_types,
                     'test_start': test_start, 'test_end': test_end, 'n_conv_vars': train_dataset.n_conv_vars, 'model': str(model).replace('\n','').replace(' ', ''), 'val_start': val_start, 'val_end': val_end, 'feed_timesteps': feed_timesteps,
                     'train len': len(train_dataset), 'conv_height': train_dataset.conv_height, 'conv_width': train_dataset.conv_width, 'test len': len(test_dataset), 'p_random_transform': p_random_transform}
writer.add_text('Parameter Description', str(param_description))
str(param_description)

"{'time_stamp': '20190814-220901', 'H_convlstm': [8, 8], 'H_conv': [8, 8, 8, 8, 8], 'batch_size': 4, 'num_convlstm_layers': 2, 'num_conv_layers': 6, 'convlstm_kernel_size': [(5, 5), (5, 5)], 'conv_kernel_size': [(5, 5), (5, 5), (5, 5), (5, 5), (5, 5), (5, 5)], 'loss': NSELoss(), 'optimizer': Adam (\nParameter Group 0\n    amsgrad: False\n    betas: (0.9, 0.999)\n    eps: 1e-08\n    lr: 0.002\n    weight_decay: 1e-06\n), 'lr': 0.002, 'patience': 200, 'min_improvement': 0.01, 'stateful_lstm': False, 'dropout': 0.05, 'geophys_shape': torch.Size([1, 460, 848]), 'conv_activation': <class 'torch.nn.modules.activation.LeakyReLU'>, 'num_epochs': 400, 'seq_len': 8, 'seq_steps': 1, 'train_start': datetime.datetime(2010, 1, 9, 0, 0), 'train_end': '2012-12-31', 'weight_decay': 1e-06, 'validation_fraction': 0.1, 'landcover_types': [], 'test_start': '2013-01-01', 'test_end': '2014-12-31', 'n_conv_vars': 15, 'model': 'ConvLSTMGridWithGeophysicalInput((conv_lstm):ConvLSTM((cell_list):ModuleList((0):Co

In [10]:
if stateful_lstm:
    train_sampler = datasets.StatefulBatchSampler(train_dataset, batch_size)
    val_sampler = datasets.StatefulBatchSampler(val_dataset, batch_size)
    test_sampler = datasets.StatefulBatchSampler(test_dataset, batch_size)
    train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_sampler=train_sampler, pin_memory=True)
    val_dataloader = torch.utils.data.DataLoader(val_dataset, batch_sampler=val_sampler, pin_memory=True)
    test_dataloader = torch.utils.data.DataLoader(test_dataset, batch_sampler=test_sampler, pin_memory=True)
else:
    train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size, shuffle=True, pin_memory=True, drop_last=False)
    test_dataloader = torch.utils.data.DataLoader(test_dataset, batch_size, shuffle=False, pin_memory=True, drop_last=False)
    
geophysical_batch = geophysical_data.repeat(batch_size,1,1,1)

In [11]:
torch.manual_seed(0)
np.random.seed(0)
for epoch in range(num_epochs):
    model.train()

    train_losses = torch.tensor(0.0)
    val_losses = torch.tensor(0.0)
    conv_hidden_states = None
    for i, train_batch in enumerate(train_dataloader):
        geophysical_input = geophysical_batch[:train_batch['y_sim'].shape[0]]
        x_conv, geophysical_input, y_train, y_sim_means, train_mask_transformed, val_mask_transformed = \
            utils.random_transform(train_batch['x_conv'], geophysical_input, train_batch['y_sim'], train_dataset.y_sim_means, 
                                   train_mask, val_mask, rdrs_contains_month=include_month, border_masking=20, p=p_random_transform)
        y_train = y_train.reshape((y_train.shape[0],-1)).to(device, non_blocking=True)
        y_sim_means = y_sim_means.reshape(-1).to(device, non_blocking=True)
        train_mask_transformed = train_mask_transformed.reshape(-1).to(device, non_blocking=True)
        val_mask_transformed = val_mask_transformed.reshape(-1).to(device, non_blocking=True)
        
        if not train_mask_transformed.any():
            print('Batch {} has no target values. skipping.'.format(i))
            continue
        if not stateful_lstm:
            conv_hidden_states = None
        
        y_pred, conv_hidden_states = model(x_conv.to(device), geophysical_input.to(device), hidden_state=conv_hidden_states)
        y_pred = y_pred.reshape((y_train.shape[0], -1))
        train_loss = loss_fn(y_pred[:,train_mask_transformed], y_train[:,train_mask_transformed], 
                             means=y_sim_means[train_mask_transformed])
        val_losses += loss_fn(y_pred[:,val_mask_transformed], y_train[:,val_mask_transformed], 
                              means=y_sim_means[val_mask_transformed]).detach()

        optimizer.zero_grad()
        train_loss.backward()
        optimizer.step()
        
        train_losses += train_loss.detach()
        
    train_loss = (train_losses / len(train_dataloader)).item()
    val_loss = (val_losses / len(train_dataloader)).item()
    print('Epoch', epoch, 'mean train loss:\t{}'.format(train_loss))
    print('Epoch', epoch, 'mean val loss:\t{}'.format(val_loss))
    writer.add_scalar('loss_nse', train_loss, epoch)
    writer.add_scalar('loss_nse_val', val_loss, epoch)
    
    if train_loss < best_loss_model[1] - min_improvement:
        best_loss_model = (epoch, train_loss, model.state_dict())  # new best model
        load_data.pickle_model('ConvLSTM_simulationTraining', model, 'allStations', time_stamp)
    elif epoch > best_loss_model[0] + patience:
        print('Patience exhausted in epoch {}. Best train-loss was {}'.format(epoch, best_loss_model[1]))
        break
    
print('Using best model from epoch', str(best_loss_model[0]), 'which had loss', str(best_loss_model[1]))
model.load_state_dict(best_loss_model[2])
load_data.save_model_with_state('ConvLSTM_simulationTraining', best_loss_model[0], model, optimizer, time_stamp)

Epoch 0 mean train loss:	2.2597007751464844
Epoch 0 mean val loss:	1.764514446258545
Saved model as ../pickle/models/ConvLSTM_simulationTraining_allStations_20190814-220901.pkl
Epoch 1 mean train loss:	1.7105542421340942
Epoch 1 mean val loss:	1.6219234466552734
Saved model as ../pickle/models/ConvLSTM_simulationTraining_allStations_20190814-220901.pkl
Epoch 2 mean train loss:	1.5593063831329346
Epoch 2 mean val loss:	1.5468872785568237
Saved model as ../pickle/models/ConvLSTM_simulationTraining_allStations_20190814-220901.pkl
Epoch 3 mean train loss:	1.581915020942688
Epoch 3 mean val loss:	1.5398499965667725
Epoch 4 mean train loss:	1.5550776720046997
Epoch 4 mean val loss:	1.5541765689849854
Epoch 5 mean train loss:	1.5443331003189087
Epoch 5 mean val loss:	1.5245566368103027
Saved model as ../pickle/models/ConvLSTM_simulationTraining_allStations_20190814-220901.pkl
Epoch 6 mean train loss:	1.5562074184417725
Epoch 6 mean val loss:	1.554924726486206
Epoch 7 mean train loss:	1.530774

In [12]:
del train_mask_transformed, val_mask_transformed, y_train, y_pred, y_sim_means
if USE_CUDA:
    torch.cuda.empty_cache()

In [13]:
logger.warning('predicting')
model.eval()

predictions = []
conv_hidden_states = None
for i, test_batch in enumerate(test_dataloader):
    if not stateful_lstm:
        conv_hidden_states = None
        
    geophysical_input = geophysical_batch[:test_batch['y_sim'].shape[0]]
    pred, conv_hidden_states = model(test_batch['x_conv'].to(device), geophysical_input.to(device), hidden_state=conv_hidden_states)
    predictions.append(pred.detach().cpu())
    
predictions = torch.cat(predictions).cpu()

if stateful_lstm:
    # reorder time series
    pred_indices = np.array(list(test_sampler.__iter__())).reshape(-1)
    predictions = predictions[pred_indices.argsort()]

2019-08-15 04:25:18,355 - 20190814-220901 - predicting


In [14]:
actuals = test_dataset.data_runoff.copy()
if len(actuals['date'].unique()) != len(predictions):
    print('Warning: length of prediction {} and actuals {} does not match.'.format(len(predictions), len(actuals['date'].unique())))

nse_dict, nse_sim_dict = {}, {}
mse_dict, mse_sim_dict = {}, {}
predictions_df = pd.DataFrame(columns=actuals.columns)
predictions_df['is_test_subbasin'] = False
predictions_df['is_val_subbasin'] = False
for subbasin in test_dataset.simulated_streamflow['subbasin'].unique():
    row, col = test_dataset.outlet_to_row_col[subbasin]
    
    station = None
    subbasin_sim = test_dataset.simulated_streamflow[test_dataset.simulated_streamflow['subbasin'] == subbasin].set_index('date')
    if subbasin in station_subbasins:
        station = subbasin_sim['StationID'].values[0]
        act = actuals[actuals['station'] == station].set_index('date')['runoff']
    if predictions.shape[0] != subbasin_sim.shape[0]:
        print('Warning: length of prediction {} and actuals {} does not match for subbasin {}. Ignoring excess actuals.'.format(len(predictions), len(subbasin_sim), subbasin))
        subbasin_sim = subbasin_sim.iloc[:predictions.shape[0]]
        if station is not None:
            act = act.iloc[:predictions.shape[0]]
    pred = pd.DataFrame({'runoff': predictions[:,row,col]}, index=subbasin_sim.index)
    pred['subbasin'] = subbasin
    pred['station'] = station
    pred['is_test_subbasin'] = subbasin in test_subbasins
    pred['is_val_subbasin'] = subbasin in val_subbasins
    predictions_df = predictions_df.append(pred.reset_index(), sort=True)
    nse_sim, mse_sim = evaluate.evaluate_daily('Sub{}'.format(subbasin), pred['runoff'], subbasin_sim['simulated_streamflow'], writer=writer)
    nse_sim_dict[subbasin] = nse_sim
    mse_sim_dict[subbasin] = mse_sim

    if station is not None:
        nse, mse = evaluate.evaluate_daily(station, pred['runoff'], act, writer=writer)
        nse_dict[subbasin] = nse
        mse_dict[subbasin] = mse
        print(station, subbasin, '\tNSE:', nse, '\tMSE:', mse, '(clipped to 0)')
    print(subbasin, '\tNSE sim:', nse_sim, '\tMSE sim:', mse_sim)


To register the converters:
	>>> from pandas.plotting import register_matplotlib_converters
	>>> register_matplotlib_converters()


1 	NSE sim: -1.1783414665338374 	MSE sim: 2520184.087931834
2 	NSE sim: -0.5010817652394131 	MSE sim: 127.40804813389913
3 	NSE sim: -0.2566246879113472 	MSE sim: 39.98825783511758
4 	NSE sim: 0.1725208715814962 	MSE sim: 14.099843265490884
5 	NSE sim: 0.13246704936395448 	MSE sim: 21.04153785382686
6 	NSE sim: 0.1740360291644889 	MSE sim: 4.792939883031943
7 	NSE sim: 0.422467321425404 	MSE sim: 1.6012787649544482
8 	NSE sim: 0.39951812135403963 	MSE sim: 4.571163666035866
9 	NSE sim: 0.029503254057743677 	MSE sim: 3.939364588940524
10 	NSE sim: -0.05662081160985788 	MSE sim: 6.022564545845385
11 	NSE sim: -0.7900311705619085 	MSE sim: 21.84400478377599
12 	NSE sim: -0.16264465086411328 	MSE sim: 3.421425187946521
13 	NSE sim: 0.31403632582323826 	MSE sim: 2.6325987630118095
14 	NSE sim: -0.8483789222378137 	MSE sim: 495.93296591184543
15 	NSE sim: -0.9350845358707724 	MSE sim: 233.2292128310841
16 	NSE sim: -0.5018619172238596 	MSE sim: 21.973754138830127
17 	NSE sim: -0.991428264719

  np.sum((evaluation - np.mean(evaluation)) ** 2, dtype=np.float64))


499 	NSE sim: -inf 	MSE sim: 0.04004334346119331
500 	NSE sim: -0.8206241700472188 	MSE sim: 21.360112873975705
501 	NSE sim: -0.07858119106488748 	MSE sim: 0.04295818344817197
502 	NSE sim: -0.08741214247564577 	MSE sim: 0.11336548599341828
503 	NSE sim: 0.1458121440374509 	MSE sim: 0.023051996142923574
504 	NSE sim: -0.21595761568438254 	MSE sim: 0.008183588276886497
505 	NSE sim: 0.004097276742064171 	MSE sim: 2.8320152334201776
506 	NSE sim: -0.085764398628559 	MSE sim: 38.18001253942485
507 	NSE sim: -8.943619830347728 	MSE sim: 0.13850175110594018
508 	NSE sim: 0.3473276073730258 	MSE sim: 1.7008851454835385
509 	NSE sim: -0.4881652724879233 	MSE sim: 85.63440510205618
510 	NSE sim: -0.3849197184241586 	MSE sim: 1.3395569302018673
511 	NSE sim: 0.11551666348089495 	MSE sim: 0.644258837062963
512 	NSE sim: -0.5238007868576542 	MSE sim: 357.1743027193366
513 	NSE sim: -0.5160562653901204 	MSE sim: 333.0740762682066
514 	NSE sim: -0.17469706456969836 	MSE sim: 1.114111516216643
515 

  np.sum((evaluation - np.mean(evaluation)) ** 2, dtype=np.float64))


525 	NSE sim: nan 	MSE sim: 0.0
526 	NSE sim: -0.6695168114341825 	MSE sim: 0.0006485629131358882
527 	NSE sim: -121.27975929816179 	MSE sim: 2.004977031653202
528 	NSE sim: 0.14629281118202409 	MSE sim: 0.057661636298533145
529 	NSE sim: 0.2072792653755543 	MSE sim: 0.008171100459318148
530 	NSE sim: 0.2908060023566157 	MSE sim: 3.7428666393189722
531 	NSE sim: -0.612200165521984 	MSE sim: 1.9269906375751498
532 	NSE sim: -0.6124474513208256 	MSE sim: 0.5299911109677474
533 	NSE sim: -0.4602828412085851 	MSE sim: 0.002868749175442021
534 	NSE sim: 0.1876258757044882 	MSE sim: 0.4966626200931968
535 	NSE sim: -0.6402473835565123 	MSE sim: 0.5098109397945282
536 	NSE sim: 0.17866187400576383 	MSE sim: 2.200339986512772
537 	NSE sim: -0.6729304778269833 	MSE sim: 2.701740464755092
538 	NSE sim: -0.6455155990431545 	MSE sim: 281.68639797313597
539 	NSE sim: -0.1813508091738456 	MSE sim: 0.022959550819550922
540 	NSE sim: -0.5435899062715306 	MSE sim: 8048.930032703275
541 	NSE sim: -0.308

In [15]:
def print_nse_mse(name, nse_dict, mse_dict, subbasins):
    nses = list(nse_dict[s] for s in subbasins)
    mses = list(mse_dict[s] for s in subbasins)
    print(name, 'Median NSE (clipped to 0)', np.median(nses), '/ Min', np.min(nses), '/ Max', np.max(nses))
    print(name, 'Median MSE (clipped to 0)', np.median(mses), '/ Min', np.min(mses), '/ Max', np.max(mses))
    
    return np.median(nses)

print_nse_mse('Train sim', nse_sim_dict, mse_sim_dict, train_subbasins)
print_nse_mse('Val sim', nse_sim_dict, mse_sim_dict, val_subbasins)
nse_median_sim_test = print_nse_mse('Test sim', nse_sim_dict, mse_sim_dict, train_subbasins)
nse_median_stations_train_val = print_nse_mse('Stations (Train/Val)', nse_dict, mse_dict, list(s for s in station_subbasins if s not in test_subbasins))
nse_median_stations_test = print_nse_mse('Stations (Test)', nse_dict, mse_dict, list(s for s in station_subbasins if s in test_subbasins))
nse_median_stations = print_nse_mse('Stations (Train/Val/Test)', nse_dict, mse_dict, station_subbasins)

writer.add_scalar('nse_median_sim', nse_median_sim_test)
writer.add_scalar('nse_median_stations_test', nse_median_stations_test)
writer.add_scalar('nse_median_stations_all', nse_median_stations)

Train sim Median NSE (clipped to 0) -0.17739779675046308 / Min -1.3649223994508612 / Max 0.4491255807281165
Train sim Median MSE (clipped to 0) 14.340357163615057 / Min 0.0006485629131358882 / Max 2526768.573342948
Val sim Median NSE (clipped to 0) -0.437275138186618 / Min -75.09981163321675 / Max 0.422467321425404
Val sim Median MSE (clipped to 0) 23.903211274699554 / Min 0.008171100459318148 / Max 51827.653483272494
Test sim Median NSE (clipped to 0) -0.17739779675046308 / Min -1.3649223994508612 / Max 0.4491255807281165
Test sim Median MSE (clipped to 0) 14.340357163615057 / Min 0.0006485629131358882 / Max 2526768.573342948
Stations (Train/Val) Median NSE (clipped to 0) -0.22396789832473707 / Min -2.246413473370865 / Max 0.12428329471805744
Stations (Train/Val) Median MSE (clipped to 0) 213.04389784436188 / Min 7.290410555001457 / Max 99707.47002572678
Stations (Test) Median NSE (clipped to 0) -0.09427711808766792 / Min -0.500596202892257 / Max 0.06812034060202476
Stations (Test) Me

In [16]:
nse_dict

{676: -0.22511827563509712,
 677: -1.404001104675356,
 678: -0.09931289657110254,
 680: 0.06637802600765041,
 681: 0.12428329471805744,
 682: 0.04812159027183005,
 683: -0.2781443303006914,
 684: -0.3141522160413541,
 685: -0.458868308809659,
 686: -0.01515395963882038,
 687: -0.9496796414209034,
 688: -0.08960454856146405,
 689: 0.005849629247400734,
 690: -0.17303145701261147,
 691: -1.2797590898435884,
 692: -0.7445544515868889,
 693: -0.6931523334095402,
 694: -0.047902173931401615,
 695: 0.0975603377667541,
 696: -0.3409686582435445,
 697: 0.06812034060202476,
 698: -0.19712733962114504,
 699: -0.008476880188982738,
 700: 0.028327838101861125,
 701: -0.07128906355117204,
 702: -0.39591420525780885,
 703: -1.2809516645865764,
 704: -0.3160168428238028,
 705: -0.04997335715530338,
 706: -0.029862232039069347,
 707: -2.246413473370865,
 709: -0.46846388404891726,
 710: -0.2783535087793463,
 712: 0.058738378054539186,
 713: -0.4082826955479879,
 714: -0.15477776991167147,
 715: -0.222

In [17]:
writer.close()

In [18]:
save_df = pd.merge(predictions_df.rename({'runoff': 'prediction'}, axis=1), 
                   train_dataset.simulated_streamflow, on=['date', 'subbasin'])
save_df = pd.merge(save_df, actuals.rename({'runoff': 'actual'}, axis=1), how='left', on=['date', 'station'])\
            [['date', 'subbasin', 'station', 'prediction', 'actual', 'simulated_streamflow', 'is_test_subbasin', 'is_val_subbasin']]
load_data.pickle_results('ConvLSTM_simulationTraining', save_df, time_stamp)

'ConvLSTM_simulationTraining_20190814-220901.pkl'

In [19]:
_ = print(train_subbasins), print(val_subbasins), print(test_subbasins)

[1, 3, 4, 5, 6, 10, 12, 13, 14, 16, 17, 18, 20, 21, 22, 23, 24, 25, 26, 27, 29, 30, 31, 33, 34, 35, 36, 37, 39, 42, 43, 45, 46, 47, 48, 52, 54, 55, 57, 58, 59, 62, 63, 64, 66, 67, 70, 73, 74, 75, 78, 79, 81, 82, 84, 85, 87, 88, 89, 91, 92, 94, 95, 96, 98, 99, 100, 101, 102, 103, 105, 108, 109, 110, 111, 112, 113, 115, 116, 117, 118, 120, 121, 122, 124, 125, 126, 127, 128, 130, 131, 132, 133, 134, 135, 136, 138, 139, 140, 141, 142, 144, 145, 147, 148, 149, 150, 152, 153, 154, 155, 156, 159, 160, 161, 162, 164, 165, 166, 167, 168, 169, 174, 175, 177, 178, 179, 180, 181, 183, 184, 185, 187, 189, 190, 191, 192, 193, 194, 195, 196, 198, 199, 200, 201, 202, 203, 204, 206, 208, 209, 211, 212, 213, 214, 217, 218, 219, 220, 221, 223, 224, 225, 226, 227, 228, 229, 231, 233, 234, 235, 237, 238, 239, 241, 242, 243, 246, 249, 250, 251, 254, 255, 257, 260, 261, 262, 263, 265, 266, 270, 271, 273, 274, 276, 277, 278, 279, 280, 281, 282, 283, 285, 286, 288, 289, 290, 291, 292, 293, 295, 296, 297, 298, 

In [20]:
list(k for k in list(nse_dict.keys()) if k in test_subbasins)

[683, 688, 697, 698, 700, 709, 716, 717, 718, 720]

In [21]:
datetime.now().strftime('%Y%m%d-%H%M%S')

'20190815-042742'