ConvLSTM trained on simulated streamflow.

In [1]:
%load_ext autoreload
%autoreload 2
import sys
sys.path.append('..')
import os
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt 
from datetime import datetime, timedelta
from sklearn import preprocessing
import netCDF4 as nc
import torch
from torch import nn, utils
from torch.utils.tensorboard import SummaryWriter
from src import load_data, evaluate, conv_lstm, datasets
import torch.autograd as autograd
import pickle

time_stamp = datetime.now().strftime('%Y%m%d-%H%M%S')
time_stamp

'20190812-221842'

In [2]:
import logging
logger = logging.getLogger()
fhandler = logging.FileHandler(filename='../log.out', mode='a')
chandler = logging.StreamHandler(sys.stdout)
formatter = logging.Formatter('%(asctime)s - {} - %(message)s'.format(time_stamp))
fhandler.setFormatter(formatter)
chandler.setFormatter(formatter)
logger.addHandler(fhandler)
logger.addHandler(chandler)
logger.setLevel(logging.INFO)

In [3]:
USE_CUDA = False
if torch.cuda.is_available():
    print('CUDA Available')
    USE_CUDA = True
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
device = torch.device('cuda:0' if USE_CUDA else 'cpu')
num_devices = torch.cuda.device_count() if USE_CUDA else 0
logger.warning('cuda devices: {}'.format(list(torch.cuda.get_device_name(i) for i in range(num_devices))))
torch.manual_seed(0)
np.random.seed(0)

CUDA Available
2019-08-12 22:18:42,957 - 20190812-221842 - cuda devices: ['Tesla V100-SXM2-16GB']


In [4]:
landcover_nc = nc.Dataset('../data/NA_NALCMS_LC_30m_LAEA_mmu12_urb05_n40-45w75-90_erie.nc', 'r')
landcover_nc.set_auto_mask(False)
erie_lats = landcover_nc['lat'][:][::-1]
erie_lons = landcover_nc['lon'][:]
landcover_nc.close()
erie_lat_min, erie_lat_max, erie_lon_min, erie_lon_max = erie_lats.min(), erie_lats.max(), erie_lons.min(), erie_lons.max()
del erie_lats, erie_lons

out_lats, out_lons = load_data.load_dem_lats_lons()
out_lats = out_lats[(erie_lat_min <= out_lats) & (out_lats <= erie_lat_max)].copy()
out_lons = out_lons[(erie_lon_min <= out_lons) &  (out_lons <= erie_lon_max)].copy()

In [5]:
seq_len = 8
seq_steps = 1
stateful_lstm = False
validation_fraction, val_start, val_end = None, None, None

if stateful_lstm:
    val_start = datetime.strptime('2010-01-01', '%Y-%m-%d') + timedelta(days=seq_len * seq_steps)  # first day for which to make a prediction in train set
    val_end = '2010-09-30'
    train_start = '2010-10-01'
    train_end = '2012-12-31'
else:
    validation_fraction = 0.1
    train_start = datetime.strptime('2010-01-01', '%Y-%m-%d') + timedelta(days=seq_len * seq_steps)  # first day for which to make a prediction in train set
    train_end = '2012-12-31'
test_start = '2013-01-01'
test_end = '2014-12-31'

In [6]:
rdrs_vars = [4, 5]
agg = ['sum', 'minmax']
include_month = True
train_dataset = datasets.RdrsGridDataset(rdrs_vars, seq_len, seq_steps, train_start, train_end, aggregate_daily=agg, include_months=True, include_simulated_streamflow=True, resample_rdrs=True, out_lats=out_lats, out_lons=out_lons)
if stateful_lstm:
    val_dataset = datasets.RdrsGridDataset(rdrs_vars, seq_len, seq_steps, val_start, val_end, conv_scalers=train_dataset.conv_scalers, aggregate_daily=agg,  include_months=True, include_simulated_streamflow=True, resample_rdrs=True, out_lats=out_lats, out_lons=out_lons)
test_dataset = datasets.RdrsGridDataset(rdrs_vars, seq_len, seq_steps, test_start, test_end, conv_scalers=train_dataset.conv_scalers, aggregate_daily=agg, include_months=True, include_simulated_streamflow=True, resample_rdrs=True, out_lats=out_lats, out_lons=out_lons)

  exec(code_obj, self.user_global_ns, self.user_ns)
  exec(code_obj, self.user_global_ns, self.user_ns)


In [7]:
landcover_types = []
geophysical_dataset = datasets.GeophysicalGridDataset(dem=True, landcover=False, soil=False, groundwater=False, min_lat=erie_lat_min, max_lat=erie_lat_max, min_lon=erie_lon_min, max_lon=erie_lon_max, landcover_types=landcover_types)
geophysical_data = next(geophysical_dataset.__iter__())

In [8]:
subbasins = train_dataset.simulated_streamflow['subbasin'].unique()
np.random.seed(0)
test_subbasins = np.random.choice(subbasins, size=int(0.2 * len(subbasins)), replace=False)
val_subbasins = np.random.choice(list(s for s in subbasins if s not in test_subbasins), size=int(validation_fraction * len(subbasins)), replace=False)
train_subbasins = list(s for s in subbasins if s not in test_subbasins and s not in val_subbasins)
station_subbasins = train_dataset.simulated_streamflow[~pd.isna(train_dataset.simulated_streamflow['StationID'])]['subbasin'].unique()

train_subbasin_indices = list(train_dataset.outlet_to_row_col[s] for s in train_subbasins)
val_subbasin_indices = list(train_dataset.outlet_to_row_col[s] for s in val_subbasins)
test_subbasin_indices = list(test_dataset.outlet_to_row_col[s] for s in test_subbasins)

train_mask = torch.zeros((train_dataset.out_lats.shape[0], train_dataset.out_lats.shape[1]), dtype=torch.bool)
val_mask = torch.zeros((train_dataset.out_lats.shape[0], train_dataset.out_lats.shape[1]), dtype=torch.bool)
for row in range(train_mask.shape[0]):
    for col in range(train_mask.shape[1]):
        train_mask[row, col] = True if (row, col) in train_subbasin_indices else False
        val_mask[row, col] = True if (row, col) in val_subbasin_indices else False
train_mask = train_mask.reshape(-1).to(device)
val_mask = val_mask.reshape(-1).to(device)

In [9]:
# Train model
num_epochs = 300
learning_rate = 2e-3
patience = 100
min_improvement = 0.01
best_loss_model = (-1, np.inf, None)

# Prepare model
batch_size = 8
num_convlstm_layers = 2
num_conv_layers = 6
convlstm_hidden_dims = [8] * num_convlstm_layers
conv_hidden_dims = [8] * (num_conv_layers - 1)
convlstm_kernel_size = [(5,5)] * num_convlstm_layers
conv_kernel_size = [(5,5)] * num_conv_layers
conv_activation = nn.LeakyReLU
dropout = 0.1
weight_decay = 1e-6

model = conv_lstm.ConvLSTMGridWithGeophysicalInput((train_dataset.conv_height, train_dataset.conv_width), train_dataset.n_conv_vars, 
                                                   geophysical_dataset.shape[0], convlstm_hidden_dims, conv_hidden_dims, convlstm_kernel_size, 
                                                   conv_kernel_size, num_convlstm_layers, num_conv_layers, conv_activation, dropout=dropout, 
                                                   geophysical_size=geophysical_dataset.shape[1:]).to(device)
if num_devices > 1:
    model = torch.nn.DataParallel(model, device_ids=list(range(num_devices)))
loss_fn = evaluate.NSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate, weight_decay=weight_decay)

writer = SummaryWriter(comment='ConvLSTM_simulationTraining')
param_description = {'time_stamp': time_stamp, 'H_convlstm': convlstm_hidden_dims, 'H_conv': conv_hidden_dims, 'batch_size': batch_size, 'num_convlstm_layers': num_convlstm_layers, 'num_conv_layers': num_conv_layers, 'convlstm_kernel_size': convlstm_kernel_size, 'conv_kernel_size': conv_kernel_size, 'loss': loss_fn, 
                     'optimizer': optimizer, 'lr': learning_rate, 'patience': patience, 'min_improvement': min_improvement, 'stateful_lstm': stateful_lstm, 'dropout': dropout, 'geophys_shape': geophysical_dataset.shape, 'conv_activation': conv_activation,
                     'num_epochs': num_epochs, 'seq_len': seq_len, 'seq_steps': seq_steps, 'train_start': train_start, 'train_end': train_end, 'weight_decay': weight_decay, 'validation_fraction': validation_fraction, 'landcover_types': landcover_types,
                     'test_start': test_start, 'test_end': test_end, 'n_conv_vars': train_dataset.n_conv_vars, 'model': str(model).replace('\n','').replace(' ', ''), 'val_start': val_start, 'val_end': val_end,
                     'train len': len(train_dataset), 'conv_height': train_dataset.conv_height, 'conv_width': train_dataset.conv_width, 'test len': len(test_dataset)}
writer.add_text('Parameter Description', str(param_description))
str(param_description)

"{'time_stamp': '20190812-221842', 'H_convlstm': [8, 8], 'H_conv': [8, 8, 8, 8, 8], 'batch_size': 8, 'num_convlstm_layers': 2, 'num_conv_layers': 6, 'convlstm_kernel_size': [(5, 5), (5, 5)], 'conv_kernel_size': [(5, 5), (5, 5), (5, 5), (5, 5), (5, 5), (5, 5)], 'loss': NSELoss(), 'optimizer': Adam (\nParameter Group 0\n    amsgrad: False\n    betas: (0.9, 0.999)\n    eps: 1e-08\n    lr: 0.002\n    weight_decay: 1e-06\n), 'lr': 0.002, 'patience': 100, 'min_improvement': 0.01, 'stateful_lstm': False, 'dropout': 0.1, 'geophys_shape': torch.Size([1, 460, 848]), 'conv_activation': <class 'torch.nn.modules.activation.LeakyReLU'>, 'num_epochs': 300, 'seq_len': 8, 'seq_steps': 1, 'train_start': datetime.datetime(2010, 1, 9, 0, 0), 'train_end': '2012-12-31', 'weight_decay': 1e-06, 'validation_fraction': 0.1, 'landcover_types': [], 'test_start': '2013-01-01', 'test_end': '2014-12-31', 'n_conv_vars': 15, 'model': 'ConvLSTMGridWithGeophysicalInput((conv_lstm):ConvLSTM((cell_list):ModuleList((0):Con

In [10]:
if stateful_lstm:
    train_sampler = datasets.StatefulBatchSampler(train_dataset, batch_size)
    val_sampler = datasets.StatefulBatchSampler(val_dataset, batch_size)
    test_sampler = datasets.StatefulBatchSampler(test_dataset, batch_size)
    train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_sampler=train_sampler, pin_memory=True)
    val_dataloader = torch.utils.data.DataLoader(val_dataset, batch_sampler=val_sampler, pin_memory=True)
    test_dataloader = torch.utils.data.DataLoader(test_dataset, batch_sampler=test_sampler, pin_memory=True)
else:
    train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size, shuffle=True, pin_memory=True, drop_last=False)
    test_dataloader = torch.utils.data.DataLoader(test_dataset, batch_size, shuffle=False, pin_memory=True, drop_last=False)
    
geophysical_batch = geophysical_data.repeat(batch_size,1,1,1).to(device, non_blocking=True)

In [11]:
torch.manual_seed(0)
np.random.seed(0)
for epoch in range(num_epochs):
    model.train()

    train_losses = torch.tensor(0.0)
    val_losses = torch.tensor(0.0)
    conv_hidden_states = None
    for i, train_batch in enumerate(train_dataloader):
        y_train = train_batch['y_sim'].reshape((train_batch['y_sim'].shape[0],-1)).to(device, non_blocking=True)
        geophysical_input = geophysical_batch[:y_train.shape[0]]
        
        if not train_mask.any():
            print('Batch {} has no target values. skipping.'.format(i))
            continue
        if not stateful_lstm:
            conv_hidden_states = None
        
        y_pred, conv_hidden_states = model(train_batch['x_conv'].to(device), geophysical_input, hidden_state=conv_hidden_states)
        y_pred = y_pred.reshape((train_batch['y_sim'].shape[0], -1))
        train_loss = loss_fn(y_pred[:,train_mask], y_train[:,train_mask])
        val_losses += loss_fn(y_pred[:,val_mask], y_train[:,val_mask]).detach()

        optimizer.zero_grad()
        train_loss.backward()
        optimizer.step()
        
        train_losses += train_loss.detach()
        
    train_loss = (train_losses / len(train_dataloader)).item()
    val_loss = (val_losses / len(train_dataloader)).item()
    print('Epoch', epoch, 'mean train loss:\t{}'.format(train_loss))
    print('Epoch', epoch, 'mean val loss:\t{}'.format(val_loss))
    writer.add_scalar('loss_nse', train_loss, epoch)
    writer.add_scalar('loss_nse_val', val_loss, epoch)
    
    if val_loss < best_loss_model[1] - min_improvement:
        best_loss_model = (epoch, val_loss, model.state_dict())  # new best model
        load_data.pickle_model('ConvLSTM_simulationTraining', model, 'allStations', time_stamp)
    elif epoch > best_loss_model[0] + patience:
        print('Patience exhausted in epoch {}. Best val-loss was {}'.format(epoch, best_loss_model[1]))
        break

print('Using best model from epoch', str(best_loss_model[0]), 'which had loss', str(best_loss_model[1]))
model.load_state_dict(best_loss_model[2])
load_data.save_model_with_state('ConvLSTM_simulationTraining', best_loss_model[0], model, optimizer, time_stamp)

Epoch 0 mean train loss:	3.4285502433776855
Epoch 0 mean val loss:	2.878911018371582
Saved model as ../pickle/models/ConvLSTM_simulationTraining_allStations_20190812-221842.pkl
Epoch 1 mean train loss:	2.0043373107910156
Epoch 1 mean val loss:	1.9888877868652344
Saved model as ../pickle/models/ConvLSTM_simulationTraining_allStations_20190812-221842.pkl
Epoch 2 mean train loss:	1.9184786081314087
Epoch 2 mean val loss:	1.9327749013900757
Saved model as ../pickle/models/ConvLSTM_simulationTraining_allStations_20190812-221842.pkl
Epoch 3 mean train loss:	1.9819895029067993
Epoch 3 mean val loss:	1.9865829944610596
Epoch 4 mean train loss:	2.7140097618103027
Epoch 4 mean val loss:	2.486116647720337
Epoch 5 mean train loss:	1.9195704460144043
Epoch 5 mean val loss:	1.920892357826233
Saved model as ../pickle/models/ConvLSTM_simulationTraining_allStations_20190812-221842.pkl
Epoch 6 mean train loss:	2.1274378299713135
Epoch 6 mean val loss:	2.211799144744873
Epoch 7 mean train loss:	1.9645459

In [12]:
logger.warning('predicting')
model.eval()

predictions = []
conv_hidden_states = None
for i, test_batch in enumerate(test_dataloader):
    if not stateful_lstm:
        conv_hidden_states = None
        
    geophysical_input = geophysical_batch[:test_batch['y_sim'].shape[0]]
    pred, conv_hidden_states = model(test_batch['x_conv'].to(device), geophysical_input, hidden_state=conv_hidden_states)
    predictions.append(pred.detach())
    
predictions = torch.cat(predictions).cpu()

if stateful_lstm:
    # reorder time series
    pred_indices = np.array(list(test_sampler.__iter__())).reshape(-1)
    predictions = predictions[pred_indices.argsort()]

2019-08-12 23:51:08,913 - 20190812-221842 - predicting


In [13]:
actuals = test_dataset.data_runoff.copy()
if len(actuals['date'].unique()) != len(predictions):
    print('Warning: length of prediction {} and actuals {} does not match.'.format(len(predictions), len(actuals['date'].unique())))

nse_dict, nse_sim_dict = {}, {}
mse_dict, mse_sim_dict = {}, {}
predictions_df = pd.DataFrame(columns=actuals.columns)
predictions_df['is_test_subbasin'] = False
predictions_df['is_val_subbasin'] = False
for subbasin in test_dataset.simulated_streamflow['subbasin'].unique():
    row, col = test_dataset.outlet_to_row_col[subbasin]
    
    station = None
    subbasin_sim = test_dataset.simulated_streamflow[test_dataset.simulated_streamflow['subbasin'] == subbasin].set_index('date')
    if subbasin in station_subbasins:
        station = subbasin_sim['StationID'].values[0]
        act = actuals[actuals['station'] == station].set_index('date')['runoff']
    if predictions.shape[0] != subbasin_sim.shape[0]:
        print('Warning: length of prediction {} and actuals {} does not match for subbasin {}. Ignoring excess actuals.'.format(len(predictions), len(subbasin_sim), subbasin))
        subbasin_sim = subbasin_sim.iloc[:predictions.shape[0]]
        if station is not None:
            act = act.iloc[:predictions.shape[0]]
    pred = pd.DataFrame({'runoff': predictions[:,row,col]}, index=subbasin_sim.index)
    pred['subbasin'] = subbasin
    pred['station'] = station
    pred['is_test_subbasin'] = subbasin in test_subbasins
    pred['is_val_subbasin'] = subbasin in val_subbasins
    predictions_df = predictions_df.append(pred.reset_index(), sort=True)
    nse_sim, mse_sim = evaluate.evaluate_daily('Sub{}'.format(subbasin), pred['runoff'], subbasin_sim['simulated_streamflow'], writer=writer)
    nse_sim_dict[subbasin] = nse_sim
    mse_sim_dict[subbasin] = mse_sim

    if station is not None:
        nse, mse = evaluate.evaluate_daily(station, pred['runoff'], act, writer=writer)
        nse_dict[subbasin] = nse
        mse_dict[subbasin] = mse
        print(station, subbasin, '\tNSE:', nse, '\tMSE:', mse, '(clipped to 0)')
    print(subbasin, '\tNSE sim:', nse_sim, '\tMSE sim:', mse_sim)


To register the converters:
	>>> from pandas.plotting import register_matplotlib_converters
	>>> register_matplotlib_converters()


1 	NSE sim: -1.1758420322810381 	MSE sim: 2517292.422630728
2 	NSE sim: -0.8098592780320846 	MSE sim: 153.61630748630006
3 	NSE sim: -1.0874365081863688 	MSE sim: 66.42631655003973
4 	NSE sim: 0.0067710204676982855 	MSE sim: 16.92414038879068
5 	NSE sim: -0.19896671084031725 	MSE sim: 29.08028267183216
6 	NSE sim: 0.16739312691163688 	MSE sim: 4.831487606989649
7 	NSE sim: 0.4314703707304529 	MSE sim: 1.5763167286804254
8 	NSE sim: 0.2669391613666918 	MSE sim: 5.580419975554415
9 	NSE sim: 0.025730218448415165 	MSE sim: 3.954679800386977
10 	NSE sim: -0.0606333610294274 	MSE sim: 6.045435416461621
11 	NSE sim: -0.5277077020649581 	MSE sim: 18.64283421480465
12 	NSE sim: -0.10086072713234939 	MSE sim: 3.2396077489647026
13 	NSE sim: 0.19535175852278497 	MSE sim: 3.088087671281976
14 	NSE sim: -0.9092459839446854 	MSE sim: 512.2640233998211
15 	NSE sim: -0.7801485979163061 	MSE sim: 214.55530676728256
16 	NSE sim: -0.5118529699346779 	MSE sim: 22.119933313718203
17 	NSE sim: -0.503982500

  np.sum((evaluation - np.mean(evaluation)) ** 2, dtype=np.float64))


500 	NSE sim: -0.05016268510952826 	MSE sim: 12.320825933775861
501 	NSE sim: 0.15664332776389678 	MSE sim: 0.03358956278700657
502 	NSE sim: 0.347635341697221 	MSE sim: 0.06801067750177449
503 	NSE sim: 0.2706087347921705 	MSE sim: 0.01968410638817396
504 	NSE sim: 0.2597123404672109 	MSE sim: 0.004982253767674713
505 	NSE sim: -0.13773560094019976 	MSE sim: 3.235340639421663
506 	NSE sim: -0.4508253423211095 	MSE sim: 51.0170805308244
507 	NSE sim: -77.65091166403897 	MSE sim: 1.0955053770561416
508 	NSE sim: -0.28742549108380655 	MSE sim: 3.355072036811008
509 	NSE sim: -0.6379622777916187 	MSE sim: 94.25426586107386
510 	NSE sim: 0.18709913606331685 	MSE sim: 0.7862744470795122
511 	NSE sim: 0.3149803463893788 	MSE sim: 0.49896922551114653
512 	NSE sim: -0.5702361509655882 	MSE sim: 368.05861183626
513 	NSE sim: -0.6377975909938411 	MSE sim: 359.8203656341175
514 	NSE sim: -0.10143923786322051 	MSE sim: 1.0446319960506611
515 	NSE sim: 0.35958490788256525 	MSE sim: 0.12569392763465

  np.sum((evaluation - np.mean(evaluation)) ** 2, dtype=np.float64))


526 	NSE sim: -0.0018355096995938958 	MSE sim: 0.00038918647132135285
527 	NSE sim: -24.412089223358254 	MSE sim: 0.4166728452165107
528 	NSE sim: 0.1793447280180992 	MSE sim: 0.05542922261790115
529 	NSE sim: -53.83458627854312 	MSE sim: 0.5652165933812667
530 	NSE sim: -0.30677382723423485 	MSE sim: 6.896674505626109
531 	NSE sim: 0.057990873190405856 	MSE sim: 1.1259413109442937
532 	NSE sim: 0.2994386265604524 	MSE sim: 0.23026567489450647
533 	NSE sim: -209.81072387753028 	MSE sim: 0.41414106447862925
534 	NSE sim: 0.36422902837943827 	MSE sim: 0.3886924350502853
535 	NSE sim: 0.07479272074361909 	MSE sim: 0.2875668617252715
536 	NSE sim: -0.0004292135186247936 	MSE sim: 2.6801195908395004
537 	NSE sim: 0.3286555701587398 	MSE sim: 1.084204296550364
538 	NSE sim: -0.5784362017341023 	MSE sim: 270.2034598489497
539 	NSE sim: 0.10118388819466273 	MSE sim: 0.017468489492005745
540 	NSE sim: -0.5634563421283934 	MSE sim: 8152.522024048512
541 	NSE sim: -0.3806300111125678 	MSE sim: 74

In [14]:
def print_nse_mse(name, nse_dict, mse_dict, subbasins):
    nses = list(nse_dict[s] for s in subbasins)
    mses = list(mse_dict[s] for s in subbasins)
    print(name, 'Median NSE (clipped to 0)', np.median(nses), '/ Min', np.min(nses), '/ Max', np.max(nses))
    print(name, 'Median MSE (clipped to 0)', np.median(mses), '/ Min', np.min(mses), '/ Max', np.max(mses))
    
    return np.median(nses)

print_nse_mse('Train sim', nse_sim_dict, mse_sim_dict, train_subbasins)
print_nse_mse('Val sim', nse_sim_dict, mse_sim_dict, val_subbasins)
nse_median_sim_test = print_nse_mse('Test sim', nse_sim_dict, mse_sim_dict, train_subbasins)
nse_median_stations_train_val = print_nse_mse('Stations (Train/Val)', nse_dict, mse_dict, list(s for s in station_subbasins if s not in test_subbasins))
nse_median_stations_test = print_nse_mse('Stations (Test)', nse_dict, mse_dict, list(s for s in station_subbasins if s in test_subbasins))
nse_median_stations = print_nse_mse('Stations (Train/Val/Test)', nse_dict, mse_dict, station_subbasins)

writer.add_scalar('nse_median_sim', nse_median_sim_test)
writer.add_scalar('nse_median_stations_test', nse_median_stations_test)
writer.add_scalar('nse_median_stations_all', nse_median_stations)

Train sim Median NSE (clipped to 0) -0.08637182911132413 / Min -1.4452350010918389 / Max 0.4560928013765264
Train sim Median MSE (clipped to 0) 13.944317780557856 / Min 0.00038918647132135285 / Max 2527609.4811835275
Val sim Median NSE (clipped to 0) -0.365051166358356 / Min -53.83458627854312 / Max 0.4426879216411589
Val sim Median MSE (clipped to 0) 23.93476464517748 / Min 0.08904223383717245 / Max 51631.5273266068
Test sim Median NSE (clipped to 0) -0.08637182911132413 / Min -1.4452350010918389 / Max 0.4560928013765264
Test sim Median MSE (clipped to 0) 13.944317780557856 / Min 0.00038918647132135285 / Max 2527609.4811835275
Stations (Train/Val) Median NSE (clipped to 0) -0.24605504407601253 / Min -2.2423121841458453 / Max 0.12160278563123117
Stations (Train/Val) Median MSE (clipped to 0) 210.18822335807255 / Min 7.441985179967465 / Max 100472.16887685002
Stations (Test) Median NSE (clipped to 0) -0.09739356007708666 / Min -0.5492405127334175 / Max 0.028167809947038713
Stations (Tes

In [15]:
nse_dict

{676: -0.10286087367093599,
 677: -1.437247298157772,
 678: -0.09612375087230407,
 680: 0.11450963506853773,
 681: 0.057422395508795354,
 682: -0.0007963124109635622,
 683: -0.11392108349427565,
 684: -0.27092295075672546,
 685: -0.3895264152804261,
 686: 0.12160278563123117,
 687: -0.9394116691858831,
 688: 0.028167809947038713,
 689: -0.014819710081269255,
 690: -0.372796961669019,
 691: -1.3453829891949627,
 692: -0.7978632935047876,
 693: -0.7685835225547637,
 694: -0.12820645328317437,
 695: -0.003970586881791194,
 696: -0.348784884343627,
 697: -0.05902672968666112,
 698: -0.2978905097050395,
 699: -0.026616032829433367,
 700: -0.006369442991404295,
 701: -0.15412769313038677,
 702: -0.38958290402186924,
 703: -1.4189426213218983,
 704: -0.36847025950028933,
 705: -0.11171673722806652,
 706: -0.090268287530334,
 707: -2.2423121841458453,
 709: -0.5492405127334175,
 710: -0.27898804054555093,
 712: -0.002661662605644377,
 713: -0.4190834124758609,
 714: -0.14777536891110765,
 715:

In [16]:
writer.close()

In [17]:
save_df = pd.merge(predictions_df.rename({'runoff': 'prediction'}, axis=1), 
                   train_dataset.simulated_streamflow, on=['date', 'subbasin'])
save_df = pd.merge(save_df, actuals.rename({'runoff': 'actual'}, axis=1), how='left', on=['date', 'station'])\
            [['date', 'subbasin', 'station', 'prediction', 'actual', 'simulated_streamflow', 'is_test_subbasin', 'is_val_subbasin']]
load_data.pickle_results('ConvLSTM_simulationTraining', save_df, time_stamp)

'ConvLSTM_simulationTraining_20190812-221842.pkl'

In [18]:
_ = print(train_subbasins), print(val_subbasins), print(test_subbasins)

[1, 3, 4, 5, 6, 10, 12, 13, 14, 16, 17, 18, 20, 21, 22, 23, 24, 25, 26, 27, 29, 30, 31, 33, 34, 35, 36, 37, 39, 42, 43, 45, 46, 47, 48, 52, 54, 55, 57, 58, 59, 62, 63, 64, 66, 67, 70, 73, 74, 75, 78, 79, 81, 82, 84, 85, 87, 88, 89, 91, 92, 94, 95, 96, 98, 99, 100, 101, 102, 103, 105, 108, 109, 110, 111, 112, 113, 115, 116, 117, 118, 120, 121, 122, 124, 125, 126, 127, 128, 130, 131, 132, 133, 134, 135, 136, 138, 139, 140, 141, 142, 144, 145, 147, 148, 149, 150, 152, 153, 154, 155, 156, 159, 160, 161, 162, 164, 165, 166, 167, 168, 169, 174, 175, 177, 178, 179, 180, 181, 183, 184, 185, 187, 189, 190, 191, 192, 193, 194, 195, 196, 198, 199, 200, 201, 202, 203, 204, 206, 208, 209, 211, 212, 213, 214, 217, 218, 219, 220, 221, 223, 224, 225, 226, 227, 228, 229, 231, 233, 234, 235, 237, 238, 239, 241, 242, 243, 246, 249, 250, 251, 254, 255, 257, 260, 261, 262, 263, 265, 266, 270, 271, 273, 274, 276, 277, 278, 279, 280, 281, 282, 283, 285, 286, 288, 289, 290, 291, 292, 293, 295, 296, 297, 298, 

In [19]:
datetime.now().strftime('%Y%m%d-%H%M%S')

'20190812-235308'