ConvLSTM trained on simulated streamflow.

In [1]:
%load_ext autoreload
%autoreload 2
import sys
sys.path.append('..')
import os
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt 
from datetime import datetime, timedelta
from sklearn import preprocessing
import netCDF4 as nc
import torch
from torch import nn, utils
from torch.utils.tensorboard import SummaryWriter
from src import load_data, evaluate, conv_lstm, datasets, utils
import torch.autograd as autograd
from torchvision import transforms
import torchvision.transforms.functional as TF
import random
import pickle

time_stamp = datetime.now().strftime('%Y%m%d-%H%M%S')
time_stamp

'20190818-143934'

In [2]:
import logging
logger = logging.getLogger()
fhandler = logging.FileHandler(filename='../log.out', mode='a')
chandler = logging.StreamHandler(sys.stdout)
formatter = logging.Formatter('%(asctime)s - {} - %(message)s'.format(time_stamp))
fhandler.setFormatter(formatter)
chandler.setFormatter(formatter)
logger.addHandler(fhandler)
logger.addHandler(chandler)
logger.setLevel(logging.INFO)

In [3]:
USE_CUDA = False
if torch.cuda.is_available():
    print('CUDA Available')
    USE_CUDA = True
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
device = torch.device('cuda:0' if USE_CUDA else 'cpu')
num_devices = torch.cuda.device_count() if USE_CUDA else 0
logger.warning('cuda devices: {}'.format(list(torch.cuda.get_device_name(i) for i in range(num_devices))))
torch.manual_seed(0)
np.random.seed(0)
random.seed(0)

CUDA Available
2019-08-18 14:39:34,861 - 20190818-143934 - cuda devices: ['Tesla V100-SXM2-16GB']


In [4]:
landcover_nc = nc.Dataset('../data/NA_NALCMS_LC_30m_LAEA_mmu12_urb05_n40-45w75-90_erie.nc', 'r')
landcover_nc.set_auto_mask(False)
erie_lats = landcover_nc['lat'][:][::-1]
erie_lons = landcover_nc['lon'][:]
landcover_nc.close()
erie_lat_min, erie_lat_max, erie_lon_min, erie_lon_max = erie_lats.min(), erie_lats.max(), erie_lons.min(), erie_lons.max()
del erie_lats, erie_lons

out_lats, out_lons = load_data.load_dem_lats_lons()
out_lats = out_lats[(erie_lat_min <= out_lats) & (out_lats <= erie_lat_max)][::4].copy()
out_lons = out_lons[(erie_lon_min <= out_lons) &  (out_lons <= erie_lon_max)][::4].copy()

In [5]:
seq_len = 8
seq_steps = 1
stateful_lstm = False
val_start, val_end = None, None
validation_fraction = 0.1  # fraction of subbasins
p_random_transform = 0.0

if stateful_lstm:
    val_start = datetime.strptime('2010-01-01', '%Y-%m-%d') + timedelta(days=seq_len * seq_steps)  # first day for which to make a prediction in train set
    val_end = '2010-09-30'
    train_start = '2010-10-01'
    train_end = '2012-12-31'
else:
    train_start = datetime.strptime('2010-01-01', '%Y-%m-%d') + timedelta(days=seq_len * seq_steps)  # first day for which to make a prediction in train set
    train_end = '2012-12-31'
test_start = '2013-01-01'
test_end = '2014-12-31'

In [6]:
rdrs_vars = [4, 5]
agg = ['sum', 'minmax']
include_month = True
train_dataset = datasets.RdrsGridDataset(rdrs_vars, seq_len, seq_steps, train_start, train_end, aggregate_daily=agg, include_months=True, include_simulated_streamflow=True, resample_rdrs=True, out_lats=out_lats, out_lons=out_lons)
if stateful_lstm:
    val_dataset = datasets.RdrsGridDataset(rdrs_vars, seq_len, seq_steps, val_start, val_end, conv_scalers=train_dataset.conv_scalers, aggregate_daily=agg,  include_months=True, include_simulated_streamflow=True, resample_rdrs=True, out_lats=out_lats, out_lons=out_lons)
test_dataset = datasets.RdrsGridDataset(rdrs_vars, seq_len, seq_steps, test_start, test_end, conv_scalers=train_dataset.conv_scalers, aggregate_daily=agg, include_months=True, include_simulated_streamflow=True, resample_rdrs=True, out_lats=out_lats, out_lons=out_lons)

  exec(code_obj, self.user_global_ns, self.user_ns)
  exec(code_obj, self.user_global_ns, self.user_ns)


In [7]:
landcover_types = []
geophysical_dataset = datasets.GeophysicalGridDataset(dem=True, landcover=False, soil=False, groundwater=False, min_lat=erie_lat_min, max_lat=erie_lat_max, min_lon=erie_lon_min, max_lon=erie_lon_max, landcover_types=landcover_types)
geophysical_data = next(geophysical_dataset.__iter__())[:,::4,::4].clone().detach()
geophysical_dataset.shape = geophysical_data.shape

In [8]:
subbasins = train_dataset.simulated_streamflow['subbasin'].unique()
np.random.seed(0)
test_subbasins = np.random.choice(subbasins, size=int(0.2 * len(subbasins)), replace=False)
val_subbasins = np.random.choice(list(s for s in subbasins if s not in test_subbasins), size=int(validation_fraction * len(subbasins)), replace=False)
train_subbasins = list(s for s in subbasins if s not in test_subbasins and s not in val_subbasins)
station_subbasins = train_dataset.simulated_streamflow[~pd.isna(train_dataset.simulated_streamflow['StationID'])]['subbasin'].unique()

train_subbasin_indices = list(train_dataset.outlet_to_row_col[s] for s in train_subbasins)
val_subbasin_indices = list(train_dataset.outlet_to_row_col[s] for s in val_subbasins)
test_subbasin_indices = list(test_dataset.outlet_to_row_col[s] for s in test_subbasins)

train_mask = torch.zeros((train_dataset.out_lats.shape[0], train_dataset.out_lats.shape[1]), dtype=torch.bool)
val_mask = torch.zeros((train_dataset.out_lats.shape[0], train_dataset.out_lats.shape[1]), dtype=torch.bool)
for row in range(train_mask.shape[0]):
    for col in range(train_mask.shape[1]):
        train_mask[row, col] = True if (row, col) in train_subbasin_indices else False
        val_mask[row, col] = True if (row, col) in val_subbasin_indices else False
train_mask = train_mask
val_mask = val_mask

In [9]:
# Train model
num_epochs = 200
learning_rate = 2e-3
patience = 100
min_improvement = 0.01
best_loss_model = (-1, np.inf, None)

# Prepare model
batch_size = 4
num_convlstm_layers = 3
num_conv_layers = 1
convlstm_hidden_dims = [8,8,8]
conv_hidden_dims = []
convlstm_kernel_size = [(5,5)] * num_convlstm_layers
conv_kernel_size = [(5,5)] * num_conv_layers
conv_activation = nn.LeakyReLU
dropout = 0.2
weight_decay = 1e-5
feed_timesteps = 8

model = conv_lstm.ConvLSTMGridWithGeophysicalInput((geophysical_dataset.shape[1], geophysical_dataset.shape[2]), train_dataset.n_conv_vars + geophysical_dataset.shape[0], 
                                                   0, convlstm_hidden_dims, conv_hidden_dims, convlstm_kernel_size, 
                                                   conv_kernel_size, num_convlstm_layers, num_conv_layers, conv_activation, dropout=dropout, 
                                                   geophysical_size=None, feed_timesteps=feed_timesteps).to(device)
if num_devices > 1:
    model = torch.nn.DataParallel(model, device_ids=list(range(num_devices)))
loss_fn = evaluate.NSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate, weight_decay=weight_decay)

writer = SummaryWriter(comment='ConvLSTM_simulationTraining')
param_description = {'time_stamp': time_stamp, 'H_convlstm': convlstm_hidden_dims, 'H_conv': conv_hidden_dims, 'batch_size': batch_size, 'num_convlstm_layers': num_convlstm_layers, 'num_conv_layers': num_conv_layers, 'convlstm_kernel_size': convlstm_kernel_size, 'conv_kernel_size': conv_kernel_size, 'loss': loss_fn, 
                     'optimizer': optimizer, 'lr': learning_rate, 'patience': patience, 'min_improvement': min_improvement, 'stateful_lstm': stateful_lstm, 'dropout': dropout, 'geophys_shape': geophysical_dataset.shape, 'conv_activation': conv_activation,
                     'num_epochs': num_epochs, 'seq_len': seq_len, 'seq_steps': seq_steps, 'train_start': train_start, 'train_end': train_end, 'weight_decay': weight_decay, 'validation_fraction': validation_fraction, 'landcover_types': landcover_types,
                     'test_start': test_start, 'test_end': test_end, 'n_conv_vars': train_dataset.n_conv_vars, 'model': str(model).replace('\n','').replace(' ', ''), 'val_start': val_start, 'val_end': val_end, 'feed_timesteps': feed_timesteps,
                     'train len': len(train_dataset), 'conv_height': train_dataset.conv_height, 'conv_width': train_dataset.conv_width, 'test len': len(test_dataset), 'p_random_transform': p_random_transform}
writer.add_text('Parameter Description', str(param_description))
str(param_description)

"{'time_stamp': '20190818-143934', 'H_convlstm': [8, 8, 8], 'H_conv': [], 'batch_size': 4, 'num_convlstm_layers': 3, 'num_conv_layers': 1, 'convlstm_kernel_size': [(5, 5), (5, 5), (5, 5)], 'conv_kernel_size': [(5, 5)], 'loss': NSELoss(), 'optimizer': Adam (\nParameter Group 0\n    amsgrad: False\n    betas: (0.9, 0.999)\n    eps: 1e-08\n    lr: 0.002\n    weight_decay: 1e-05\n), 'lr': 0.002, 'patience': 100, 'min_improvement': 0.01, 'stateful_lstm': False, 'dropout': 0.2, 'geophys_shape': torch.Size([1, 115, 212]), 'conv_activation': <class 'torch.nn.modules.activation.LeakyReLU'>, 'num_epochs': 200, 'seq_len': 8, 'seq_steps': 1, 'train_start': datetime.datetime(2010, 1, 9, 0, 0), 'train_end': '2012-12-31', 'weight_decay': 1e-05, 'validation_fraction': 0.1, 'landcover_types': [], 'test_start': '2013-01-01', 'test_end': '2014-12-31', 'n_conv_vars': 15, 'model': 'ConvLSTMGridWithGeophysicalInput((conv_lstm):ConvLSTM((cell_list):ModuleList((0):ConvLSTMCell((conv):Conv2d(24,32,kernel_size=

In [10]:
stride = geophysical_dataset.shape[1] // train_dataset.conv_height, geophysical_dataset.shape[2] // train_dataset.conv_width
upsample_kernel = (geophysical_dataset.shape[1] + stride[0] * (1 - train_dataset.conv_height), 
                   geophysical_dataset.shape[2] + stride[1] * (1 - train_dataset.conv_width))
upsample_model = nn.ConvTranspose2d(train_dataset.n_conv_vars, train_dataset.n_conv_vars, upsample_kernel, stride=stride, bias=False)
upsample_model.weight = nn.Parameter(torch.ones(upsample_model.weight.shape))
upsample_model.eval()  # Model is just for upsampling, not for learning

ConvTranspose2d(15, 15, kernel_size=(16, 22), stride=(3, 5), bias=False)

In [11]:
if stateful_lstm:
    train_sampler = datasets.StatefulBatchSampler(train_dataset, batch_size)
    val_sampler = datasets.StatefulBatchSampler(val_dataset, batch_size)
    test_sampler = datasets.StatefulBatchSampler(test_dataset, batch_size)
    train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_sampler=train_sampler, pin_memory=True)
    val_dataloader = torch.utils.data.DataLoader(val_dataset, batch_sampler=val_sampler, pin_memory=True)
    test_dataloader = torch.utils.data.DataLoader(test_dataset, batch_sampler=test_sampler, pin_memory=True)
else:
    train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size, shuffle=True, pin_memory=True, drop_last=False)
    test_dataloader = torch.utils.data.DataLoader(test_dataset, batch_size, shuffle=False, pin_memory=True, drop_last=False)
    
geophysical_batch = geophysical_data.repeat(batch_size,seq_len,1,1,1)

In [12]:
torch.manual_seed(0)
np.random.seed(0)
for epoch in range(num_epochs):
    model.train()

    train_losses = torch.tensor(0.0)
    val_losses = torch.tensor(0.0)
    conv_hidden_states = None
    for i, train_batch in enumerate(train_dataloader):
        geophysical_input = geophysical_batch[:train_batch['y_sim'].shape[0]]
        
        # Upsample x_conv to geophysical input size
        with torch.no_grad():
            x_conv = upsample_model(train_batch['x_conv'].reshape(-1,*train_batch['x_conv'].shape[2:]))
        x_conv = x_conv.reshape(*train_batch['x_conv'].shape[0:3], *geophysical_input.shape[-2:])
        x_conv = torch.cat([geophysical_input, x_conv], dim=2)
        x_conv, geophysical_input, y_train, y_sim_means, train_mask_transformed, val_mask_transformed = \
            utils.random_transform(x_conv, geophysical_input, train_batch['y_sim'], train_dataset.y_sim_means, 
                                   train_mask, val_mask, rdrs_contains_month=include_month, border_masking=20, p=p_random_transform)
        y_train = y_train.reshape((y_train.shape[0],-1)).to(device, non_blocking=True)
        y_sim_means = y_sim_means.reshape(-1).to(device, non_blocking=True)
        train_mask_transformed = train_mask_transformed.reshape(-1).to(device, non_blocking=True)
        val_mask_transformed = val_mask_transformed.reshape(-1).to(device, non_blocking=True)
        
        if not train_mask_transformed.any():
            print('Batch {} has no target values. skipping.'.format(i))
            continue
        if not stateful_lstm:
            conv_hidden_states = None
        
        dummy_geophysical_input = torch.zeros((x_conv.shape[0],0,*geophysical_input.shape[-2:]), device=device)
        y_pred, conv_hidden_states = model(x_conv.to(device), dummy_geophysical_input.to(device), hidden_state=conv_hidden_states)
        y_pred = y_pred.reshape((y_train.shape[0], -1))
        train_loss = loss_fn(y_pred[:,train_mask_transformed], y_train[:,train_mask_transformed], 
                             means=y_sim_means[train_mask_transformed])
        val_losses += loss_fn(y_pred[:,val_mask_transformed], y_train[:,val_mask_transformed], 
                              means=y_sim_means[val_mask_transformed]).detach()

        optimizer.zero_grad()
        train_loss.backward()
        optimizer.step()
        
        train_losses += train_loss.detach()
        
    train_loss = (train_losses / len(train_dataloader)).item()
    val_loss = (val_losses / len(train_dataloader)).item()
    print('Epoch', epoch, 'mean train loss:\t{}'.format(train_loss))
    print('Epoch', epoch, 'mean val loss:\t{}'.format(val_loss))
    writer.add_scalar('loss_nse', train_loss, epoch)
    writer.add_scalar('loss_nse_val', val_loss, epoch)
    
    if train_loss < best_loss_model[1] - min_improvement:
        best_loss_model = (epoch, train_loss, model.state_dict())  # new best model
        load_data.pickle_model('ConvLSTM_simulationTraining', model, 'allStations', time_stamp)
    elif epoch > best_loss_model[0] + patience:
        print('Patience exhausted in epoch {}. Best train-loss was {}'.format(epoch, best_loss_model[1]))
        break
    
print('Using best model from epoch', str(best_loss_model[0]), 'which had loss', str(best_loss_model[1]))
model.load_state_dict(best_loss_model[2])
load_data.save_model_with_state('ConvLSTM_simulationTraining', best_loss_model[0], model, optimizer, time_stamp)

Epoch 0 mean train loss:	1.4888505935668945
Epoch 0 mean val loss:	1.5237016677856445
Saved model as /home/mgauch/runoff-nn/src/../pickle/models/ConvLSTM_simulationTraining_allStations_20190818-143934.pkl
Epoch 1 mean train loss:	1.5186420679092407
Epoch 1 mean val loss:	1.5688539743423462
Epoch 2 mean train loss:	1.4349104166030884
Epoch 2 mean val loss:	1.5424152612686157
Saved model as /home/mgauch/runoff-nn/src/../pickle/models/ConvLSTM_simulationTraining_allStations_20190818-143934.pkl
Epoch 3 mean train loss:	1.4310365915298462
Epoch 3 mean val loss:	1.4939792156219482
Epoch 4 mean train loss:	1.3984565734863281
Epoch 4 mean val loss:	1.4852526187896729
Saved model as /home/mgauch/runoff-nn/src/../pickle/models/ConvLSTM_simulationTraining_allStations_20190818-143934.pkl
Epoch 5 mean train loss:	1.3882395029067993
Epoch 5 mean val loss:	1.4359710216522217
Saved model as /home/mgauch/runoff-nn/src/../pickle/models/ConvLSTM_simulationTraining_allStations_20190818-143934.pkl
Epoch 6 

In [13]:
del train_mask_transformed, val_mask_transformed, y_train, y_pred, y_sim_means
if USE_CUDA:
    torch.cuda.empty_cache()

In [14]:
logger.warning('predicting')
model.eval()

predictions = []
conv_hidden_states = None
for i, test_batch in enumerate(test_dataloader):
    if not stateful_lstm:
        conv_hidden_states = None
        
    geophysical_input = geophysical_batch[:test_batch['y_sim'].shape[0]]
    with torch.no_grad():
        x_conv = upsample_model(test_batch['x_conv'].reshape(-1,*test_batch['x_conv'].shape[2:]))
    x_conv = x_conv.reshape(*test_batch['x_conv'].shape[0:3], *geophysical_input.shape[-2:])
    x_conv = torch.cat([geophysical_input, x_conv], dim=2)
    dummy_geophysical_input = torch.zeros((x_conv.shape[0],0,*geophysical_input.shape[-2:]), device=device)
    pred, conv_hidden_states = model(x_conv.to(device), dummy_geophysical_input.to(device), hidden_state=conv_hidden_states)
    predictions.append(pred.detach().cpu())
    
predictions = torch.cat(predictions).cpu()

if stateful_lstm:
    # reorder time series
    pred_indices = np.array(list(test_sampler.__iter__())).reshape(-1)
    predictions = predictions[pred_indices.argsort()]

2019-08-18 23:05:33,745 - 20190818-143934 - predicting


In [15]:
actuals = test_dataset.data_runoff.copy()
if len(actuals['date'].unique()) != len(predictions):
    print('Warning: length of prediction {} and actuals {} does not match.'.format(len(predictions), len(actuals['date'].unique())))

nse_dict, nse_sim_dict = {}, {}
mse_dict, mse_sim_dict = {}, {}
predictions_df = pd.DataFrame(columns=actuals.columns)
predictions_df['is_test_subbasin'] = False
predictions_df['is_val_subbasin'] = False
for subbasin in test_dataset.simulated_streamflow['subbasin'].unique():
    row, col = test_dataset.outlet_to_row_col[subbasin]
    
    station = None
    subbasin_sim = test_dataset.simulated_streamflow[test_dataset.simulated_streamflow['subbasin'] == subbasin].set_index('date')
    if subbasin in station_subbasins:
        station = subbasin_sim['StationID'].values[0]
        act = actuals[actuals['station'] == station].set_index('date')['runoff']
    if predictions.shape[0] != subbasin_sim.shape[0]:
        print('Warning: length of prediction {} and actuals {} does not match for subbasin {}. Ignoring excess actuals.'.format(len(predictions), len(subbasin_sim), subbasin))
        subbasin_sim = subbasin_sim.iloc[:predictions.shape[0]]
        if station is not None:
            act = act.iloc[:predictions.shape[0]]
    pred = pd.DataFrame({'runoff': predictions[:,row,col]}, index=subbasin_sim.index)
    pred['subbasin'] = subbasin
    pred['station'] = station
    pred['is_test_subbasin'] = subbasin in test_subbasins
    pred['is_val_subbasin'] = subbasin in val_subbasins
    predictions_df = predictions_df.append(pred.reset_index(), sort=True)
    subbasin_type = 'test' if subbasin in test_subbasins else ('val' if subbasin in val_subbasins else 'train')
    nse_sim, mse_sim = evaluate.evaluate_daily('Sub{}'.format(subbasin), pred['runoff'], subbasin_sim['simulated_streamflow'], writer=writer, group=subbasin_type)
    nse_sim_dict[subbasin] = nse_sim
    mse_sim_dict[subbasin] = mse_sim

    if station is not None:
        nse, mse = evaluate.evaluate_daily(station, pred['runoff'], act, writer=writer)
        nse_dict[subbasin] = nse
        mse_dict[subbasin] = mse
        print(station, subbasin, '\tNSE:', nse, '\tMSE:', mse, '(clipped to 0)')
    print(subbasin, '\tNSE sim:', nse_sim, '\tMSE sim:', mse_sim)


To register the converters:
	>>> from pandas.plotting import register_matplotlib_converters
	>>> register_matplotlib_converters()


1 	NSE sim: -1.17488393700493 	MSE sim: 2516183.975444341
2 	NSE sim: -0.5031860573046527 	MSE sim: 127.58665515647732
3 	NSE sim: -0.2531850828920925 	MSE sim: 39.87880286922038
4 	NSE sim: -0.29384536793653604 	MSE sim: 22.046497937117756
5 	NSE sim: -0.20305761626152985 	MSE sim: 29.1795053482893
6 	NSE sim: -0.2937880237316961 	MSE sim: 7.507649774190513
7 	NSE sim: -0.0031338092708712573 	MSE sim: 2.7813090527756814
8 	NSE sim: -0.6616918165607615 	MSE sim: 12.649616126867592
9 	NSE sim: -0.1562990756021767 	MSE sim: 4.693558893110293
10 	NSE sim: -0.019990093582846535 	MSE sim: 5.8137754880733565
11 	NSE sim: 0.027191620100406655 	MSE sim: 11.87131892097359
12 	NSE sim: -0.09327930461877698 	MSE sim: 3.2172971745952053
13 	NSE sim: -0.20738439806694875 	MSE sim: 4.63371282254189
14 	NSE sim: -0.6021396513479016 	MSE sim: 429.8652508631598
15 	NSE sim: -0.5407018804718897 	MSE sim: 185.69560147310634
16 	NSE sim: -0.4606107990218753 	MSE sim: 21.370208687063325
17 	NSE sim: -0.221

  np.sum((evaluation - np.mean(evaluation)) ** 2, dtype=np.float64))


499 	NSE sim: -inf 	MSE sim: 0.12076688948336842
500 	NSE sim: -0.4883746918563072 	MSE sim: 17.462061604946722
501 	NSE sim: -0.20142597228755887 	MSE sim: 0.04785089684900912
502 	NSE sim: -0.13794111999003222 	MSE sim: 0.11863326061990698
503 	NSE sim: -0.033789149861037426 	MSE sim: 0.02789890224831025
504 	NSE sim: -0.32032702161422866 	MSE sim: 0.008886010989500844
505 	NSE sim: -0.3236888086977192 	MSE sim: 3.7641295510031902
506 	NSE sim: -0.5629910267416731 	MSE sim: 54.96129461914738
507 	NSE sim: -9.370188250284844 	MSE sim: 0.1444432969550154
508 	NSE sim: -0.1829766508520898 	MSE sim: 3.082875016039138
509 	NSE sim: -0.670307684197593 	MSE sim: 96.115537378806
510 	NSE sim: 0.04688686018523702 	MSE sim: 0.9218940958961043
511 	NSE sim: -0.1617150358823356 	MSE sim: 0.8461947750894064
512 	NSE sim: -0.6209253878483625 	MSE sim: 379.94001588535207
513 	NSE sim: -0.6313432909333705 	MSE sim: 358.4023707485214
514 	NSE sim: -0.15989061448739972 	MSE sim: 1.100068715695116
515 

  np.sum((evaluation - np.mean(evaluation)) ** 2, dtype=np.float64))


525 	NSE sim: -inf 	MSE sim: 1.262537991057668
526 	NSE sim: -2024.3078842726213 	MSE sim: 0.7867782896373172
527 	NSE sim: -47.92056288002172 	MSE sim: 0.8021327937915254
528 	NSE sim: -0.04196637345595433 	MSE sim: 0.0703771584080325
529 	NSE sim: -4.598126421897069 	MSE sim: 0.05770361664496519
530 	NSE sim: -0.11281812233212829 	MSE sim: 5.87304720506239
531 	NSE sim: -0.13868356347673383 	MSE sim: 1.3610174548455936
532 	NSE sim: 0.011423055831519924 	MSE sim: 0.3249327551653073
533 	NSE sim: -454.45684893247716 	MSE sim: 0.8947523198609119
534 	NSE sim: -0.048363846425582446 	MSE sim: 0.6409400782284225
535 	NSE sim: 0.007771438828375388 	MSE sim: 0.3083979772399071
536 	NSE sim: -0.11478255165686169 	MSE sim: 2.986468723472538
537 	NSE sim: -0.5175415717491454 	MSE sim: 2.4507913064434805
538 	NSE sim: -0.4901191993794909 	MSE sim: 255.08497766165138
539 	NSE sim: -0.3394320556214112 	MSE sim: 0.02603185955565704
540 	NSE sim: -0.5651180105398534 	MSE sim: 8161.186665302678
541 

In [16]:
def print_nse_mse(name, nse_dict, mse_dict, subbasins):
    nses = list(nse_dict[s] for s in subbasins)
    mses = list(mse_dict[s] for s in subbasins)
    print(name, 'Median NSE (clipped to 0)', np.median(nses), '/ Min', np.min(nses), '/ Max', np.max(nses))
    print(name, 'Median MSE (clipped to 0)', np.median(mses), '/ Min', np.min(mses), '/ Max', np.max(mses))
    
    return np.median(nses)

print_nse_mse('Train sim', nse_sim_dict, mse_sim_dict, train_subbasins)
print_nse_mse('Val sim', nse_sim_dict, mse_sim_dict, val_subbasins)
nse_median_sim_test = print_nse_mse('Test sim', nse_sim_dict, mse_sim_dict, train_subbasins)
nse_median_stations_train_val = print_nse_mse('Stations (Train/Val)', nse_dict, mse_dict, list(s for s in station_subbasins if s not in test_subbasins))
nse_median_stations_test = print_nse_mse('Stations (Test)', nse_dict, mse_dict, list(s for s in station_subbasins if s in test_subbasins))
nse_median_stations = print_nse_mse('Stations (Train/Val/Test)', nse_dict, mse_dict, station_subbasins)

writer.add_scalar('nse_median_sim', nse_median_sim_test)
writer.add_scalar('nse_median_stations_test', nse_median_stations_test)
writer.add_scalar('nse_median_stations_all', nse_median_stations)

Train sim Median NSE (clipped to 0) -0.22861165060417465 / Min -2024.3078842726213 / Max 0.1455999921050649
Train sim Median MSE (clipped to 0) 17.226557561872873 / Min 0.004644901997346725 / Max 2526273.3041749443
Val sim Median NSE (clipped to 0) -0.4630495864189452 / Min -73.94441119266952 / Max 0.0947796561655968
Val sim Median MSE (clipped to 0) 27.06281728071902 / Min 0.05770361664496519 / Max 51945.67611529914
Test sim Median NSE (clipped to 0) -0.22861165060417465 / Min -2024.3078842726213 / Max 0.1455999921050649
Test sim Median MSE (clipped to 0) 17.226557561872873 / Min 0.004644901997346725 / Max 2526273.3041749443
Stations (Train/Val) Median NSE (clipped to 0) -0.22470229727851387 / Min -1.87191679306797 / Max 0.012840362221520851
Stations (Train/Val) Median MSE (clipped to 0) 210.814258890311 / Min 9.709267657881012 / Max 100733.4786426688
Stations (Test) Median NSE (clipped to 0) -0.1329104010096207 / Min -0.6333623754638213 / Max -0.08679162417834196
Stations (Test) Medi

In [17]:
nse_dict

{676: -0.07670669169697675,
 677: -0.7159635981713459,
 678: -0.08773400709845647,
 680: -0.006528240522458706,
 681: -0.06413883583583857,
 682: -0.07152608659789106,
 683: -0.11974000827732745,
 684: -0.1786753804659802,
 685: -0.43808675179202194,
 686: -0.2038607501144336,
 687: -0.956203321529258,
 688: -0.12138663138109673,
 689: -0.3239956747287329,
 690: -0.3900949391059425,
 691: -1.8181807890159614,
 692: -0.7962159913902183,
 693: -0.8622012953430309,
 694: -0.06173762068866995,
 695: 0.012840362221520851,
 696: -0.10675649820134203,
 697: -0.10536252588495643,
 698: -0.29989987187624423,
 699: -0.09649999269491483,
 700: -0.08679162417834196,
 701: -0.25320116334613485,
 702: -0.6770760704921477,
 703: -1.87191679306797,
 704: -0.20668019790792114,
 705: -0.06793025999052427,
 706: -0.052807247726171624,
 707: -1.6014846712262316,
 709: -0.5538323753467684,
 710: -0.2899559765217674,
 712: -0.09971768911514256,
 713: -0.4227741893182111,
 714: -0.15893602575572863,
 715: -0

In [18]:
writer.close()

In [19]:
save_df = pd.merge(predictions_df.rename({'runoff': 'prediction'}, axis=1), 
                   test_dataset.simulated_streamflow, on=['date', 'subbasin'])
save_df = pd.merge(save_df, actuals.rename({'runoff': 'actual'}, axis=1), how='left', on=['date', 'station'])\
            [['date', 'subbasin', 'station', 'prediction', 'actual', 'simulated_streamflow', 'is_test_subbasin', 'is_val_subbasin']]
load_data.pickle_results('ConvLSTM_simulationTraining', save_df, time_stamp)

'ConvLSTM_simulationTraining_20190818-143934.pkl'

In [20]:
_ = print(train_subbasins), print(val_subbasins), print(test_subbasins)

[1, 3, 4, 5, 6, 10, 12, 13, 14, 16, 17, 18, 20, 21, 22, 23, 24, 25, 26, 27, 29, 30, 31, 33, 34, 35, 36, 37, 39, 42, 43, 45, 46, 47, 48, 52, 54, 55, 57, 58, 59, 62, 63, 64, 66, 67, 70, 73, 74, 75, 78, 79, 81, 82, 84, 85, 87, 88, 89, 91, 92, 94, 95, 96, 98, 99, 100, 101, 102, 103, 105, 108, 109, 110, 111, 112, 113, 115, 116, 117, 118, 120, 121, 122, 124, 125, 126, 127, 128, 130, 131, 132, 133, 134, 135, 136, 138, 139, 140, 141, 142, 144, 145, 147, 148, 149, 150, 152, 153, 154, 155, 156, 159, 160, 161, 162, 164, 165, 166, 167, 168, 169, 174, 175, 177, 178, 179, 180, 181, 183, 184, 185, 187, 189, 190, 191, 192, 193, 194, 195, 196, 198, 199, 200, 201, 202, 203, 204, 206, 208, 209, 211, 212, 213, 214, 217, 218, 219, 220, 221, 223, 224, 225, 226, 227, 228, 229, 231, 233, 234, 235, 237, 238, 239, 241, 242, 243, 246, 249, 250, 251, 254, 255, 257, 260, 261, 262, 263, 265, 266, 270, 271, 273, 274, 276, 277, 278, 279, 280, 281, 282, 283, 285, 286, 288, 289, 290, 291, 292, 293, 295, 296, 297, 298, 

In [21]:
list(k for k in list(nse_dict.keys()) if k in test_subbasins)

[683, 688, 697, 698, 700, 709, 716, 717, 718, 720]

In [22]:
datetime.now().strftime('%Y%m%d-%H%M%S')

'20190818-230853'