ConvLSTM trained on simulated streamflow.

In [1]:
%load_ext autoreload
%autoreload 2
import sys
sys.path.append('..')
import os
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt 
from datetime import datetime, timedelta
from sklearn import preprocessing
import netCDF4 as nc
import torch
from torch import nn, utils
from torch.utils.tensorboard import SummaryWriter
from src import load_data, evaluate, conv_lstm, datasets, utils
import torch.autograd as autograd
from torchvision import transforms
import torchvision.transforms.functional as TF
import random
import pickle

time_stamp = datetime.now().strftime('%Y%m%d-%H%M%S')
time_stamp

'20190817-100517'

In [2]:
import logging
logger = logging.getLogger()
fhandler = logging.FileHandler(filename='../log.out', mode='a')
chandler = logging.StreamHandler(sys.stdout)
formatter = logging.Formatter('%(asctime)s - {} - %(message)s'.format(time_stamp))
fhandler.setFormatter(formatter)
chandler.setFormatter(formatter)
logger.addHandler(fhandler)
logger.addHandler(chandler)
logger.setLevel(logging.INFO)

In [3]:
USE_CUDA = False
if torch.cuda.is_available():
    print('CUDA Available')
    USE_CUDA = True
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
device = torch.device('cuda:0' if USE_CUDA else 'cpu')
num_devices = torch.cuda.device_count() if USE_CUDA else 0
logger.warning('cuda devices: {}'.format(list(torch.cuda.get_device_name(i) for i in range(num_devices))))
torch.manual_seed(0)
np.random.seed(0)
random.seed(0)

CUDA Available
2019-08-17 10:05:17,820 - 20190817-100517 - cuda devices: ['Tesla V100-SXM2-16GB']


In [4]:
landcover_nc = nc.Dataset('../data/NA_NALCMS_LC_30m_LAEA_mmu12_urb05_n40-45w75-90_erie.nc', 'r')
landcover_nc.set_auto_mask(False)
erie_lats = landcover_nc['lat'][:][::-1]
erie_lons = landcover_nc['lon'][:]
landcover_nc.close()
erie_lat_min, erie_lat_max, erie_lon_min, erie_lon_max = erie_lats.min(), erie_lats.max(), erie_lons.min(), erie_lons.max()
del erie_lats, erie_lons

out_lats, out_lons = load_data.load_dem_lats_lons()
out_lats = out_lats[(erie_lat_min <= out_lats) & (out_lats <= erie_lat_max)][::3].copy()
out_lons = out_lons[(erie_lon_min <= out_lons) &  (out_lons <= erie_lon_max)][::3].copy()

In [5]:
seq_len = 8
seq_steps = 1
stateful_lstm = False
val_start, val_end = None, None
validation_fraction = 0.1  # fraction of subbasins
p_random_transform = 0.5

if stateful_lstm:
    val_start = datetime.strptime('2010-01-01', '%Y-%m-%d') + timedelta(days=seq_len * seq_steps)  # first day for which to make a prediction in train set
    val_end = '2010-09-30'
    train_start = '2010-10-01'
    train_end = '2012-12-31'
else:
    train_start = datetime.strptime('2010-01-01', '%Y-%m-%d') + timedelta(days=seq_len * seq_steps)  # first day for which to make a prediction in train set
    train_end = '2012-12-31'
test_start = '2013-01-01'
test_end = '2014-12-31'

In [6]:
rdrs_vars = [4, 5]
agg = ['sum', 'minmax']
include_month = True
train_dataset = datasets.RdrsGridDataset(rdrs_vars, seq_len, seq_steps, train_start, train_end, aggregate_daily=agg, include_months=True, include_simulated_streamflow=True, resample_rdrs=True, out_lats=out_lats, out_lons=out_lons)
if stateful_lstm:
    val_dataset = datasets.RdrsGridDataset(rdrs_vars, seq_len, seq_steps, val_start, val_end, conv_scalers=train_dataset.conv_scalers, aggregate_daily=agg,  include_months=True, include_simulated_streamflow=True, resample_rdrs=True, out_lats=out_lats, out_lons=out_lons)
test_dataset = datasets.RdrsGridDataset(rdrs_vars, seq_len, seq_steps, test_start, test_end, conv_scalers=train_dataset.conv_scalers, aggregate_daily=agg, include_months=True, include_simulated_streamflow=True, resample_rdrs=True, out_lats=out_lats, out_lons=out_lons)

  exec(code_obj, self.user_global_ns, self.user_ns)
  exec(code_obj, self.user_global_ns, self.user_ns)


In [7]:
landcover_types = []
geophysical_dataset = datasets.GeophysicalGridDataset(dem=True, landcover=False, soil=False, groundwater=False, min_lat=erie_lat_min, max_lat=erie_lat_max, min_lon=erie_lon_min, max_lon=erie_lon_max, landcover_types=landcover_types)
geophysical_data = next(geophysical_dataset.__iter__())[:,::3,::3].clone().detach()
geophysical_dataset.shape = geophysical_data.shape

In [8]:
subbasins = train_dataset.simulated_streamflow['subbasin'].unique()
np.random.seed(0)
test_subbasins = np.random.choice(subbasins, size=int(0.2 * len(subbasins)), replace=False)
val_subbasins = np.random.choice(list(s for s in subbasins if s not in test_subbasins), size=int(validation_fraction * len(subbasins)), replace=False)
train_subbasins = list(s for s in subbasins if s not in test_subbasins and s not in val_subbasins)
station_subbasins = train_dataset.simulated_streamflow[~pd.isna(train_dataset.simulated_streamflow['StationID'])]['subbasin'].unique()

train_subbasin_indices = list(train_dataset.outlet_to_row_col[s] for s in train_subbasins)
val_subbasin_indices = list(train_dataset.outlet_to_row_col[s] for s in val_subbasins)
test_subbasin_indices = list(test_dataset.outlet_to_row_col[s] for s in test_subbasins)

train_mask = torch.zeros((train_dataset.out_lats.shape[0], train_dataset.out_lats.shape[1]), dtype=torch.bool)
val_mask = torch.zeros((train_dataset.out_lats.shape[0], train_dataset.out_lats.shape[1]), dtype=torch.bool)
for row in range(train_mask.shape[0]):
    for col in range(train_mask.shape[1]):
        train_mask[row, col] = True if (row, col) in train_subbasin_indices else False
        val_mask[row, col] = True if (row, col) in val_subbasin_indices else False
train_mask = train_mask
val_mask = val_mask

In [9]:
# Train model
num_epochs = 200
learning_rate = 2e-3
patience = 100
min_improvement = 0.01
best_loss_model = (-1, np.inf, None)

# Prepare model
batch_size = 1
num_convlstm_layers = 2
num_conv_layers = 2
convlstm_hidden_dims = [8,8]
conv_hidden_dims = [32]
convlstm_kernel_size = [(5,5)] * num_convlstm_layers
conv_kernel_size = [(5,5)] * num_conv_layers
conv_activation = nn.LeakyReLU
dropout = 0.3
weight_decay = 1e-5
feed_timesteps = 8

model = conv_lstm.ConvLSTMGridWithGeophysicalInput((geophysical_dataset.shape[1], geophysical_dataset.shape[2]), train_dataset.n_conv_vars + geophysical_dataset.shape[0], 
                                                   0, convlstm_hidden_dims, conv_hidden_dims, convlstm_kernel_size, 
                                                   conv_kernel_size, num_convlstm_layers, num_conv_layers, conv_activation, dropout=dropout, 
                                                   geophysical_size=None, feed_timesteps=feed_timesteps).to(device)
if num_devices > 1:
    model = torch.nn.DataParallel(model, device_ids=list(range(num_devices)))
loss_fn = evaluate.NSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate, weight_decay=weight_decay)

writer = SummaryWriter(comment='ConvLSTM_simulationTraining')
param_description = {'time_stamp': time_stamp, 'H_convlstm': convlstm_hidden_dims, 'H_conv': conv_hidden_dims, 'batch_size': batch_size, 'num_convlstm_layers': num_convlstm_layers, 'num_conv_layers': num_conv_layers, 'convlstm_kernel_size': convlstm_kernel_size, 'conv_kernel_size': conv_kernel_size, 'loss': loss_fn, 
                     'optimizer': optimizer, 'lr': learning_rate, 'patience': patience, 'min_improvement': min_improvement, 'stateful_lstm': stateful_lstm, 'dropout': dropout, 'geophys_shape': geophysical_dataset.shape, 'conv_activation': conv_activation,
                     'num_epochs': num_epochs, 'seq_len': seq_len, 'seq_steps': seq_steps, 'train_start': train_start, 'train_end': train_end, 'weight_decay': weight_decay, 'validation_fraction': validation_fraction, 'landcover_types': landcover_types,
                     'test_start': test_start, 'test_end': test_end, 'n_conv_vars': train_dataset.n_conv_vars, 'model': str(model).replace('\n','').replace(' ', ''), 'val_start': val_start, 'val_end': val_end, 'feed_timesteps': feed_timesteps,
                     'train len': len(train_dataset), 'conv_height': train_dataset.conv_height, 'conv_width': train_dataset.conv_width, 'test len': len(test_dataset), 'p_random_transform': p_random_transform}
writer.add_text('Parameter Description', str(param_description))
str(param_description)

"{'time_stamp': '20190817-100517', 'H_convlstm': [8, 8], 'H_conv': [32], 'batch_size': 1, 'num_convlstm_layers': 2, 'num_conv_layers': 2, 'convlstm_kernel_size': [(5, 5), (5, 5)], 'conv_kernel_size': [(5, 5), (5, 5)], 'loss': NSELoss(), 'optimizer': Adam (\nParameter Group 0\n    amsgrad: False\n    betas: (0.9, 0.999)\n    eps: 1e-08\n    lr: 0.002\n    weight_decay: 1e-05\n), 'lr': 0.002, 'patience': 100, 'min_improvement': 0.01, 'stateful_lstm': False, 'dropout': 0.3, 'geophys_shape': torch.Size([1, 154, 283]), 'conv_activation': <class 'torch.nn.modules.activation.LeakyReLU'>, 'num_epochs': 200, 'seq_len': 8, 'seq_steps': 1, 'train_start': datetime.datetime(2010, 1, 9, 0, 0), 'train_end': '2012-12-31', 'weight_decay': 1e-05, 'validation_fraction': 0.1, 'landcover_types': [], 'test_start': '2013-01-01', 'test_end': '2014-12-31', 'n_conv_vars': 15, 'model': 'ConvLSTMGridWithGeophysicalInput((conv_lstm):ConvLSTM((cell_list):ModuleList((0):ConvLSTMCell((conv):Conv2d(24,32,kernel_size=(

In [10]:
stride = geophysical_dataset.shape[1] // train_dataset.conv_height, geophysical_dataset.shape[2] // train_dataset.conv_width
upsample_kernel = (geophysical_dataset.shape[1] + stride[0] * (1 - train_dataset.conv_height), 
                   geophysical_dataset.shape[2] + stride[1] * (1 - train_dataset.conv_width))
upsample_model = nn.ConvTranspose2d(train_dataset.n_conv_vars, train_dataset.n_conv_vars, upsample_kernel, stride=stride, bias=False)
upsample_model.weight = nn.Parameter(torch.ones(upsample_model.weight.shape))
upsample_model.eval()  # Model is just for upsampling, not for learning

ConvTranspose2d(15, 15, kernel_size=(22, 17), stride=(4, 7), bias=False)

In [11]:
if stateful_lstm:
    train_sampler = datasets.StatefulBatchSampler(train_dataset, batch_size)
    val_sampler = datasets.StatefulBatchSampler(val_dataset, batch_size)
    test_sampler = datasets.StatefulBatchSampler(test_dataset, batch_size)
    train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_sampler=train_sampler, pin_memory=True)
    val_dataloader = torch.utils.data.DataLoader(val_dataset, batch_sampler=val_sampler, pin_memory=True)
    test_dataloader = torch.utils.data.DataLoader(test_dataset, batch_sampler=test_sampler, pin_memory=True)
else:
    train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size, shuffle=True, pin_memory=True, drop_last=False)
    test_dataloader = torch.utils.data.DataLoader(test_dataset, batch_size, shuffle=False, pin_memory=True, drop_last=False)
    
geophysical_batch = geophysical_data.repeat(batch_size,seq_len,1,1,1)

In [12]:
torch.manual_seed(0)
np.random.seed(0)
for epoch in range(num_epochs):
    model.train()

    train_losses = torch.tensor(0.0)
    val_losses = torch.tensor(0.0)
    conv_hidden_states = None
    for i, train_batch in enumerate(train_dataloader):
        geophysical_input = geophysical_batch[:train_batch['y_sim'].shape[0]]
        
        # Upsample x_conv to geophysical input size
        with torch.no_grad():
            x_conv = upsample_model(train_batch['x_conv'].reshape(-1,*train_batch['x_conv'].shape[2:]))
        x_conv = x_conv.reshape(*train_batch['x_conv'].shape[0:3], *geophysical_input.shape[-2:])
        x_conv = torch.cat([geophysical_input, x_conv], dim=2)
        x_conv, geophysical_input, y_train, y_sim_means, train_mask_transformed, val_mask_transformed = \
            utils.random_transform(x_conv, geophysical_input, train_batch['y_sim'], train_dataset.y_sim_means, 
                                   train_mask, val_mask, rdrs_contains_month=include_month, border_masking=20, p=p_random_transform)
        y_train = y_train.reshape((y_train.shape[0],-1)).to(device, non_blocking=True)
        y_sim_means = y_sim_means.reshape(-1).to(device, non_blocking=True)
        train_mask_transformed = train_mask_transformed.reshape(-1).to(device, non_blocking=True)
        val_mask_transformed = val_mask_transformed.reshape(-1).to(device, non_blocking=True)
        
        if not train_mask_transformed.any():
            print('Batch {} has no target values. skipping.'.format(i))
            continue
        if not stateful_lstm:
            conv_hidden_states = None
        
        dummy_geophysical_input = torch.zeros((x_conv.shape[0],0,*geophysical_input.shape[-2:]), device=device)
        y_pred, conv_hidden_states = model(x_conv.to(device), dummy_geophysical_input.to(device), hidden_state=conv_hidden_states)
        y_pred = y_pred.reshape((y_train.shape[0], -1))
        train_loss = loss_fn(y_pred[:,train_mask_transformed], y_train[:,train_mask_transformed], 
                             means=y_sim_means[train_mask_transformed])
        val_losses += loss_fn(y_pred[:,val_mask_transformed], y_train[:,val_mask_transformed], 
                              means=y_sim_means[val_mask_transformed]).detach()

        optimizer.zero_grad()
        train_loss.backward()
        optimizer.step()
        
        train_losses += train_loss.detach()
        
    train_loss = (train_losses / len(train_dataloader)).item()
    val_loss = (val_losses / len(train_dataloader)).item()
    print('Epoch', epoch, 'mean train loss:\t{}'.format(train_loss))
    print('Epoch', epoch, 'mean val loss:\t{}'.format(val_loss))
    writer.add_scalar('loss_nse', train_loss, epoch)
    writer.add_scalar('loss_nse_val', val_loss, epoch)
    
    if val_loss < best_loss_model[1] - min_improvement:
        best_loss_model = (epoch, val_loss, model.state_dict())  # new best model
        load_data.pickle_model('ConvLSTM_simulationTraining', model, 'allStations', time_stamp)
    elif epoch > best_loss_model[0] + patience:
        print('Patience exhausted in epoch {}. Best val-loss was {}'.format(epoch, best_loss_model[1]))
        break
    
print('Using best model from epoch', str(best_loss_model[0]), 'which had loss', str(best_loss_model[1]))
model.load_state_dict(best_loss_model[2])
load_data.save_model_with_state('ConvLSTM_simulationTraining', best_loss_model[0], model, optimizer, time_stamp)

Epoch 0 mean train loss:	629039.0625
Epoch 0 mean val loss:	564844.1875
Saved model as /home/mgauch/runoff-nn/src/../pickle/models/ConvLSTM_simulationTraining_allStations_20190817-100517.pkl
Epoch 1 mean train loss:	207889.5
Epoch 1 mean val loss:	1047509.625
Epoch 2 mean train loss:	662467.0625
Epoch 2 mean val loss:	588811.875
Epoch 3 mean train loss:	1098939.375
Epoch 3 mean val loss:	1243107.125
Epoch 4 mean train loss:	3548858.0
Epoch 4 mean val loss:	1500112.875
Epoch 5 mean train loss:	3953959.5
Epoch 5 mean val loss:	2344697.5
Epoch 6 mean train loss:	1479637.375
Epoch 6 mean val loss:	2634081.5
Epoch 7 mean train loss:	3072811.0
Epoch 7 mean val loss:	871058.5
Epoch 8 mean train loss:	1152819.125
Epoch 8 mean val loss:	2406628.75
Epoch 9 mean train loss:	688730.1875
Epoch 9 mean val loss:	191032.171875
Saved model as /home/mgauch/runoff-nn/src/../pickle/models/ConvLSTM_simulationTraining_allStations_20190817-100517.pkl
Epoch 10 mean train loss:	799101.5
Epoch 10 mean val loss:

In [13]:
del train_mask_transformed, val_mask_transformed, y_train, y_pred, y_sim_means
if USE_CUDA:
    torch.cuda.empty_cache()

In [14]:
logger.warning('predicting')
model.eval()

predictions = []
conv_hidden_states = None
for i, test_batch in enumerate(test_dataloader):
    if not stateful_lstm:
        conv_hidden_states = None
        
    geophysical_input = geophysical_batch[:test_batch['y_sim'].shape[0]]
    with torch.no_grad():
        x_conv = upsample_model(test_batch['x_conv'].reshape(-1,*test_batch['x_conv'].shape[2:]))
    x_conv = x_conv.reshape(*test_batch['x_conv'].shape[0:3], *geophysical_input.shape[-2:])
    x_conv = torch.cat([geophysical_input, x_conv], dim=2)
    dummy_geophysical_input = torch.zeros((x_conv.shape[0],0,*geophysical_input.shape[-2:]), device=device)
    pred, conv_hidden_states = model(x_conv.to(device), dummy_geophysical_input.to(device), hidden_state=conv_hidden_states)
    predictions.append(pred.detach().cpu())
    
predictions = torch.cat(predictions).cpu()

if stateful_lstm:
    # reorder time series
    pred_indices = np.array(list(test_sampler.__iter__())).reshape(-1)
    predictions = predictions[pred_indices.argsort()]

2019-08-17 21:48:07,358 - 20190817-100517 - predicting


In [15]:
actuals = test_dataset.data_runoff.copy()
if len(actuals['date'].unique()) != len(predictions):
    print('Warning: length of prediction {} and actuals {} does not match.'.format(len(predictions), len(actuals['date'].unique())))

nse_dict, nse_sim_dict = {}, {}
mse_dict, mse_sim_dict = {}, {}
predictions_df = pd.DataFrame(columns=actuals.columns)
predictions_df['is_test_subbasin'] = False
predictions_df['is_val_subbasin'] = False
for subbasin in test_dataset.simulated_streamflow['subbasin'].unique():
    row, col = test_dataset.outlet_to_row_col[subbasin]
    
    station = None
    subbasin_sim = test_dataset.simulated_streamflow[test_dataset.simulated_streamflow['subbasin'] == subbasin].set_index('date')
    if subbasin in station_subbasins:
        station = subbasin_sim['StationID'].values[0]
        act = actuals[actuals['station'] == station].set_index('date')['runoff']
    if predictions.shape[0] != subbasin_sim.shape[0]:
        print('Warning: length of prediction {} and actuals {} does not match for subbasin {}. Ignoring excess actuals.'.format(len(predictions), len(subbasin_sim), subbasin))
        subbasin_sim = subbasin_sim.iloc[:predictions.shape[0]]
        if station is not None:
            act = act.iloc[:predictions.shape[0]]
    pred = pd.DataFrame({'runoff': predictions[:,row,col]}, index=subbasin_sim.index)
    pred['subbasin'] = subbasin
    pred['station'] = station
    pred['is_test_subbasin'] = subbasin in test_subbasins
    pred['is_val_subbasin'] = subbasin in val_subbasins
    predictions_df = predictions_df.append(pred.reset_index(), sort=True)
    subbasin_type = 'test' if subbasin in test_subbasins else ('val' if subbasin in val_subbasins else 'train')
    nse_sim, mse_sim = evaluate.evaluate_daily('Sub{}'.format(subbasin), pred['runoff'], subbasin_sim['simulated_streamflow'], writer=writer, group=subbasin_type)
    nse_sim_dict[subbasin] = nse_sim
    mse_sim_dict[subbasin] = mse_sim

    if station is not None:
        nse, mse = evaluate.evaluate_daily(station, pred['runoff'], act, writer=writer)
        nse_dict[subbasin] = nse
        mse_dict[subbasin] = mse
        print(station, subbasin, '\tNSE:', nse, '\tMSE:', mse, '(clipped to 0)')
    print(subbasin, '\tNSE sim:', nse_sim, '\tMSE sim:', mse_sim)


To register the converters:
	>>> from pandas.plotting import register_matplotlib_converters
	>>> register_matplotlib_converters()


1 	NSE sim: -1.1766282893212203 	MSE sim: 2518202.065362198
2 	NSE sim: -0.8912638770666239 	MSE sim: 160.5257252890399
3 	NSE sim: -0.819134042604944 	MSE sim: 57.88840680285779
4 	NSE sim: -0.706144868741688 	MSE sim: 29.071881587462382
5 	NSE sim: -0.6897496075762641 	MSE sim: 40.983953756727495
6 	NSE sim: -0.42109213552479696 	MSE sim: 8.246375646300766
7 	NSE sim: -0.23212606131414093 	MSE sim: 3.4162175941260786
8 	NSE sim: -0.41556995979952926 	MSE sim: 10.776015392102442
9 	NSE sim: -0.41560700248616533 	MSE sim: 5.746121376260691
10 	NSE sim: -0.43489998256116214 	MSE sim: 8.178693498039737
11 	NSE sim: -0.5225907440560269 	MSE sim: 18.580391248970493
12 	NSE sim: -0.2753899964659705 	MSE sim: 3.7532116585411512
13 	NSE sim: -0.22397555346688325 	MSE sim: 4.697386537094228
14 	NSE sim: -0.9386887588300563 	MSE sim: 520.1637254024271
15 	NSE sim: -0.8839410794704601 	MSE sim: 227.06506451787456
16 	NSE sim: -0.6750130128092242 	MSE sim: 24.507129251167267
17 	NSE sim: -0.85154

  np.sum((evaluation - np.mean(evaluation)) ** 2, dtype=np.float64))


499 	NSE sim: -inf 	MSE sim: 0.9355915181915128
500 	NSE sim: -0.6240529081655151 	MSE sim: 19.053879434559082
501 	NSE sim: -22.829448051014456 	MSE sim: 0.9490892380883165
502 	NSE sim: -7.191813553817761 	MSE sim: 0.8540174313133719
503 	NSE sim: -29.78723059003099 	MSE sim: 0.8308560182150486
504 	NSE sim: -160.34877155682645 	MSE sim: 1.0859029117222225
505 	NSE sim: -0.3924938190841023 	MSE sim: 3.9597880555933043
506 	NSE sim: -0.5587725839078688 	MSE sim: 54.81295654461212
507 	NSE sim: -74.63357298728478 	MSE sim: 1.0534777555721935
508 	NSE sim: -0.31716772670591453 	MSE sim: 3.432581254811635
509 	NSE sim: -0.5773921018002035 	MSE sim: 90.7688391521975
510 	NSE sim: -0.40547320590032565 	MSE sim: 1.3594371920121655
511 	NSE sim: -0.8227315379058604 	MSE sim: 1.3276800731042946
512 	NSE sim: -0.6732388750248974 	MSE sim: 392.2021392982344
513 	NSE sim: -0.6582171070556335 	MSE sim: 364.3064863707876
514 	NSE sim: -0.6188599528947689 	MSE sim: 1.5353664966573022
515 	NSE sim: 

  np.sum((evaluation - np.mean(evaluation)) ** 2, dtype=np.float64))


525 	NSE sim: -inf 	MSE sim: 1.1457175507711619
526 	NSE sim: -2868.4670146395624 	MSE sim: 1.1147116779035455
527 	NSE sim: -65.58610990450785 	MSE sim: 1.0917883855180355
528 	NSE sim: -13.520256288385546 	MSE sim: 0.9807364258249134
529 	NSE sim: -105.76456066958966 	MSE sim: 1.1004934179493606
530 	NSE sim: -0.4011344214723276 	MSE sim: 7.39467522392557
531 	NSE sim: -0.3940902440418004 	MSE sim: 1.666293618902853
532 	NSE sim: -1.8229216936348895 	MSE sim: 0.9278587053233708
533 	NSE sim: -538.6792410214478 	MSE sim: 1.0602085664460055
534 	NSE sim: -1.1455476626942902 	MSE sim: 1.311727308661729
535 	NSE sim: -1.3104041887420754 	MSE sim: 0.7181046850468761
536 	NSE sim: -0.3046513983814263 	MSE sim: 3.4951216185702814
537 	NSE sim: -0.29716520828204085 	MSE sim: 2.0948890459813283
538 	NSE sim: -0.5699022738788269 	MSE sim: 268.7425855798069
539 	NSE sim: -43.13532511098349 	MSE sim: 0.8577699629559469
540 	NSE sim: -0.5735048754599967 	MSE sim: 8204.9193229611
541 	NSE sim: -0.

In [16]:
def print_nse_mse(name, nse_dict, mse_dict, subbasins):
    nses = list(nse_dict[s] for s in subbasins)
    mses = list(mse_dict[s] for s in subbasins)
    print(name, 'Median NSE (clipped to 0)', np.median(nses), '/ Min', np.min(nses), '/ Max', np.max(nses))
    print(name, 'Median MSE (clipped to 0)', np.median(mses), '/ Min', np.min(mses), '/ Max', np.max(mses))
    
    return np.median(nses)

print_nse_mse('Train sim', nse_sim_dict, mse_sim_dict, train_subbasins)
print_nse_mse('Val sim', nse_sim_dict, mse_sim_dict, val_subbasins)
nse_median_sim_test = print_nse_mse('Test sim', nse_sim_dict, mse_sim_dict, train_subbasins)
nse_median_stations_train_val = print_nse_mse('Stations (Train/Val)', nse_dict, mse_dict, list(s for s in station_subbasins if s not in test_subbasins))
nse_median_stations_test = print_nse_mse('Stations (Test)', nse_dict, mse_dict, list(s for s in station_subbasins if s in test_subbasins))
nse_median_stations = print_nse_mse('Stations (Train/Val/Test)', nse_dict, mse_dict, station_subbasins)

writer.add_scalar('nse_median_sim', nse_median_sim_test)
writer.add_scalar('nse_median_stations_test', nse_median_stations_test)
writer.add_scalar('nse_median_stations_all', nse_median_stations)

Train sim Median NSE (clipped to 0) -0.45219745099793685 / Min -2868.4670146395624 / Max -0.12031747631259604
Train sim Median MSE (clipped to 0) 18.51750263628384 / Min 0.192390512347688 / Max 2526821.042068822
Val sim Median NSE (clipped to 0) -0.527545518709954 / Min -111.7519323364817 / Max -0.12545260079856546
Val sim Median MSE (clipped to 0) 29.18346797840735 / Min 0.6495654342164096 / Max 51939.210050934795
Test sim Median NSE (clipped to 0) -0.45219745099793685 / Min -2868.4670146395624 / Max -0.12031747631259604
Test sim Median MSE (clipped to 0) 18.51750263628384 / Min 0.192390512347688 / Max 2526821.042068822
Stations (Train/Val) Median NSE (clipped to 0) -0.3498130929948703 / Min -2.3170656565859455 / Max -0.07090186806474885
Stations (Train/Val) Median MSE (clipped to 0) 238.4242157277602 / Min 10.099173715215558 / Max 100595.58516889866
Stations (Test) Median NSE (clipped to 0) -0.17398027314751474 / Min -0.5589004267908662 / Max -0.07672898182588006
Stations (Test) Medi

In [17]:
nse_dict

{676: -0.16297335587121364,
 677: -1.2987543906950236,
 678: -0.14649692789546953,
 680: -0.10635459490390553,
 681: -0.14297799939724465,
 682: -0.1022717957677386,
 683: -0.24523103417009984,
 684: -0.26069004513544924,
 685: -0.4666178723505483,
 686: -0.32246131749154316,
 687: -0.9740589681851477,
 688: -0.18045731404688836,
 689: -0.3771648684981974,
 690: -0.4712009252154601,
 691: -1.8748096283024442,
 692: -0.8554927359102618,
 693: -0.8871994999616777,
 694: -0.1781976109560124,
 695: -0.07090186806474885,
 696: -0.4010367335918319,
 697: -0.07672898182588006,
 698: -0.30699330039809913,
 699: -0.11874661047820023,
 700: -0.1261868988503292,
 701: -0.3173027852251762,
 702: -0.648048499769883,
 703: -2.0111962528793126,
 704: -0.43703276436628613,
 705: -0.255274632700091,
 706: -0.2268062285385728,
 707: -2.3170656565859455,
 709: -0.5589004267908662,
 710: -0.4225598901243304,
 712: -0.18311306393626037,
 713: -0.42082656199510926,
 714: -0.17323269724469892,
 715: -0.23859

In [18]:
writer.close()

In [19]:
save_df = pd.merge(predictions_df.rename({'runoff': 'prediction'}, axis=1), 
                   test_dataset.simulated_streamflow, on=['date', 'subbasin'])
save_df = pd.merge(save_df, actuals.rename({'runoff': 'actual'}, axis=1), how='left', on=['date', 'station'])\
            [['date', 'subbasin', 'station', 'prediction', 'actual', 'simulated_streamflow', 'is_test_subbasin', 'is_val_subbasin']]
load_data.pickle_results('ConvLSTM_simulationTraining', save_df, time_stamp)

'ConvLSTM_simulationTraining_20190817-100517.pkl'

In [20]:
_ = print(train_subbasins), print(val_subbasins), print(test_subbasins)

[1, 3, 4, 5, 6, 10, 12, 13, 14, 16, 17, 18, 20, 21, 22, 23, 24, 25, 26, 27, 29, 30, 31, 33, 34, 35, 36, 37, 39, 42, 43, 45, 46, 47, 48, 52, 54, 55, 57, 58, 59, 62, 63, 64, 66, 67, 70, 73, 74, 75, 78, 79, 81, 82, 84, 85, 87, 88, 89, 91, 92, 94, 95, 96, 98, 99, 100, 101, 102, 103, 105, 108, 109, 110, 111, 112, 113, 115, 116, 117, 118, 120, 121, 122, 124, 125, 126, 127, 128, 130, 131, 132, 133, 134, 135, 136, 138, 139, 140, 141, 142, 144, 145, 147, 148, 149, 150, 152, 153, 154, 155, 156, 159, 160, 161, 162, 164, 165, 166, 167, 168, 169, 174, 175, 177, 178, 179, 180, 181, 183, 184, 185, 187, 189, 190, 191, 192, 193, 194, 195, 196, 198, 199, 200, 201, 202, 203, 204, 206, 208, 209, 211, 212, 213, 214, 217, 218, 219, 220, 221, 223, 224, 225, 226, 227, 228, 229, 231, 233, 234, 235, 237, 238, 239, 241, 242, 243, 246, 249, 250, 251, 254, 255, 257, 260, 261, 262, 263, 265, 266, 270, 271, 273, 274, 276, 277, 278, 279, 280, 281, 282, 283, 285, 286, 288, 289, 290, 291, 292, 293, 295, 296, 297, 298, 

In [21]:
list(k for k in list(nse_dict.keys()) if k in test_subbasins)

[683, 688, 697, 698, 700, 709, 716, 717, 718, 720]

In [22]:
datetime.now().strftime('%Y%m%d-%H%M%S')

'20190817-215142'