ConvLSTM trained on simulated streamflow.

In [1]:
%load_ext autoreload
%autoreload 2
import sys
sys.path.append('..')
import os
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt 
from datetime import datetime, timedelta
import netCDF4 as nc
import torch
from torch import nn, utils
from torch.utils.tensorboard import SummaryWriter
from src import load_data, evaluate, conv_lstm, datasets, utils, stgcn
import random
import pickle
import json
import networkx as nx

time_stamp = datetime.now().strftime('%Y%m%d-%H%M%S')
time_stamp

'20190822-154720'

In [2]:
import logging
logger = logging.getLogger()
fhandler = logging.FileHandler(filename='../log.out', mode='a')
chandler = logging.StreamHandler(sys.stdout)
formatter = logging.Formatter('%(asctime)s - {} - %(message)s'.format(time_stamp))
fhandler.setFormatter(formatter)
chandler.setFormatter(formatter)
logger.addHandler(fhandler)
logger.addHandler(chandler)
logger.setLevel(logging.INFO)

In [3]:
USE_CUDA = False
if torch.cuda.is_available():
    print('CUDA Available')
    USE_CUDA = True
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
device = torch.device('cuda:0' if USE_CUDA else 'cpu')
num_devices = torch.cuda.device_count() if USE_CUDA else 0
logger.warning('cuda devices: {}'.format(list(torch.cuda.get_device_name(i) for i in range(num_devices))))
torch.manual_seed(0)
np.random.seed(0)
random.seed(0)

CUDA Available
2019-08-22 15:47:20,395 - 20190822-154720 - cuda devices: ['Tesla V100-SXM2-16GB']


In [4]:
partitioning_strategy = 'distance'  # 'distance' or 'unilabel', see https://arxiv.org/abs/1801.07455
max_hops = 10 if partitioning_strategy == 'distance' else None
seq_len = 8
seq_steps = 1
train_start = datetime.strptime('2010-01-01', '%Y-%m-%d') + timedelta(days=seq_len * seq_steps)  # first day for which to make a prediction in train set
train_end = '2012-12-31'
test_start = '2013-01-01'
test_end = '2014-12-31'
spatial_val = False  # Whether the val set does spatial or temporal validation
val_fraction = 0.1 if not spatial_val else None

In [5]:
with open('../data/simulations_shervan/subbasins.geojson', 'r') as f:
     subbasin_shapes = json.loads(f.read())

subbasin_graph = utils.create_subbasin_graph()
component_graph = subbasin_graph.copy()
component_graph.remove_nodes_from(['sub-1', 'sub1', 'sub474'])  # remove Lake Erie and sink to get connected components
connected_components = list(nx.connected_components(nx.Graph(component_graph)))

# Split into train/test/val regions
test_subbasins = [1, 474]
train_subbasins = []
val_subbasins = []
for component in connected_components:
    max_x = -999
    for node in component:
        subbasin = list(s['properties'] for s in subbasin_shapes['features'] if 'sub' + str(s['properties']['SubId']) == node)[0]
        max_x = max(max_x, subbasin['INSIDE_X'])
    if max_x < -81.9:
        train_subbasins += list(int(c[3:]) for c in component)
    elif -80.6 > max_x and max_x >= -81.9:
        val_subbasins += list(int(c[3:]) for c in component)
    else:
        test_subbasins += list(int(c[3:]) for c in component)
        
if not spatial_val:  # if no spatial validation, use same graph but different samples
    train_subbasins += val_subbasins
    val_subbasins = train_subbasins
    
train_subgraph = subbasin_graph.subgraph(list('sub' + str(t) for t in train_subbasins))
val_subgraph = subbasin_graph.subgraph(list('sub' + str(t) for t in val_subbasins))
test_subgraph = subbasin_graph.subgraph(list('sub' + str(t) for t in test_subbasins))
if partitioning_strategy == 'unilabel':
    train_adjacency = torch.unsqueeze(torch.from_numpy(nx.to_numpy_array(train_subraph)), 0).float().to(device)
    val_adjacency = torch.unsqueeze(torch.from_numpy(nx.to_numpy_array(val_subgraph)), 0).float().to(device)
    test_adjacency = torch.unsqueeze(torch.from_numpy(nx.to_numpy_array(test_subraph)), 0).float().to(device)
elif partitioning_strategy == 'distance':  # use distances in upstream-graph, i.e. in reversed downstream-graph
    train_adjacency = utils.create_hop_matrix(train_subgraph.reverse(), max_hops).float().to(device)
    val_adjacency = utils.create_hop_matrix(val_subgraph.reverse(), max_hops).float().to(device)
    test_adjacency = utils.create_hop_matrix(test_subgraph.reverse(), max_hops).float().to(device)
else:
    raise Exception('Unsupported partitioning strategy')

subbasins = list(set(train_subbasins + test_subbasins + val_subbasins))

In [6]:
rdrs_vars = [4,5]
agg = ['sum', 'minmax']
include_month = True
dem, landcover, soil, groundwater = True, False, False, False
landcover_types = []

In [7]:
train_dataset = datasets.SubbasinAggregatedDataset(rdrs_vars, train_subbasins, seq_len, seq_steps, train_start, train_end, aggregate_daily=agg, include_months=include_month, 
                                                   dem=dem, landcover=landcover, soil=soil, groundwater=groundwater, landcover_types=landcover_types)
if spatial_val:
    val_dataset = datasets.SubbasinAggregatedDataset(rdrs_vars, val_subbasins, seq_len, seq_steps, train_start, train_end, aggregate_daily=agg, include_months=include_month, 
                                                     conv_scalers=train_dataset.grid_dataset.conv_scalers, dem=dem, landcover=landcover, soil=soil, groundwater=groundwater, landcover_types=landcover_types)
else:
    val_dataset = train_dataset

# Two test datasets: one with spatial and temporal validation (i.e., different graph, different time), and one with only temporal validation (i.e. different time period only)
spatial_test_dataset = datasets.SubbasinAggregatedDataset(rdrs_vars, test_subbasins, seq_len, seq_steps, test_start, test_end, aggregate_daily=agg, include_months=include_month, 
                                                          conv_scalers=train_dataset.grid_dataset.conv_scalers, dem=dem, landcover=landcover, soil=soil, groundwater=groundwater, landcover_types=landcover_types)
temporal_test_dataset = datasets.SubbasinAggregatedDataset(rdrs_vars, train_subbasins, seq_len, seq_steps, test_start, test_end, aggregate_daily=agg, include_months=include_month, 
                                                           conv_scalers=train_dataset.grid_dataset.conv_scalers, dem=dem, landcover=landcover, soil=soil, groundwater=groundwater, landcover_types=landcover_types)

station_subbasins = train_dataset.grid_dataset.simulated_streamflow[~pd.isna(train_dataset.grid_dataset.simulated_streamflow['StationID'])]['subbasin'].unique()

  


Loading subbasin shapes
Aggregating into subbasins


  # This is added back by InteractiveShellApp.init_path()


Loading subbasin shapes
Aggregating into subbasins


  del sys.path[0]


Loading subbasin shapes
Aggregating into subbasins


In [8]:
# Train model
num_epochs = 500
learning_rate = 2e-3
patience = 300
min_improvement = 0.01
best_loss_model = (-1, np.inf, None)
dropout = 0.2
weight_decay = 1e-5

batch_size = 4
model = stgcn.Model(train_dataset.x.shape[2], train_adjacency.shape[0]).to(device)
if num_devices > 1:
    model = torch.nn.DataParallel(model, device_ids=list(range(num_devices)))
loss_fn = evaluate.NSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate, weight_decay=weight_decay)

writer = SummaryWriter(comment='STGCN_simulationTraining')
param_description = {'time_stamp': time_stamp, 'batch_size': batch_size, 'loss': loss_fn, 'include_month': include_month, 'aggregate_daily': agg, 'rdrs_vars': rdrs_vars, 'dropout': dropout, 'spatial_validation': spatial_val, 'val_fraction': val_fraction,
                     'optimizer': optimizer, 'lr': learning_rate, 'patience': patience, 'min_improvement': min_improvement, 'x_train_shape': train_dataset.x.shape, 'x_val_shape': val_dataset.x.shape, 'partitioning_strategy': partitioning_strategy, 'max_hops': max_hops,
                     'spatial_x_test_shape': spatial_test_dataset.x.shape, 'temporal_x_test_shape': temporal_test_dataset.x.shape, 'num_epochs': num_epochs, 'seq_len': seq_len, 'seq_steps': seq_steps, 'train_start': train_start, 'train_end': train_end, 'weight_decay': weight_decay, 
                     'landcover_types': landcover_types, 'test_start': test_start, 'test_end': test_end, 'model': str(model).replace('\n','').replace(' ', ''),}
writer.add_text('Parameter Description', str(param_description))
str(param_description)

"{'time_stamp': '20190822-154720', 'batch_size': 4, 'loss': NSELoss(), 'include_month': True, 'aggregate_daily': ['sum', 'minmax'], 'rdrs_vars': [4, 5], 'dropout': 0.2, 'spatial_validation': False, 'val_fraction': 0.1, 'optimizer': Adam (\nParameter Group 0\n    amsgrad: False\n    betas: (0.9, 0.999)\n    eps: 1e-08\n    lr: 0.002\n    weight_decay: 1e-05\n), 'lr': 0.002, 'patience': 300, 'min_improvement': 0.01, 'x_train_shape': torch.Size([1088, 8, 48, 586]), 'x_val_shape': torch.Size([1088, 8, 48, 586]), 'partitioning_strategy': 'distance', 'max_hops': 10, 'spatial_x_test_shape': torch.Size([730, 8, 48, 138]), 'temporal_x_test_shape': torch.Size([730, 8, 48, 586]), 'num_epochs': 500, 'seq_len': 8, 'seq_steps': 1, 'train_start': datetime.datetime(2010, 1, 9, 0, 0), 'train_end': '2012-12-31', 'weight_decay': 1e-05, 'landcover_types': [], 'test_start': '2013-01-01', 'test_end': '2014-12-31', 'model': 'Model((st_gcn_networks):ModuleList((0):st_gcn((gcn):ConvTemporalGraphical((conv):Con

In [9]:
if spatial_val:
    train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size, shuffle=True, pin_memory=True, drop_last=False)
    val_dataloader = torch.utils.data.DataLoader(val_dataset, batch_size, shuffle=True, pin_memory=True, drop_last=False)
else:
    val_indices = np.random.choice(len(train_dataset), size=int(val_fraction * len(train_dataset)), replace=False)
    train_indices = list(i for i in range(len(train_dataset)) if i not in val_indices)
    train_sampler = torch.utils.data.SubsetRandomSampler(train_indices)
    val_sampler = torch.utils.data.SubsetRandomSampler(val_indices)
    train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size, sampler=train_sampler, pin_memory=True, drop_last=False)
    val_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size, sampler=val_sampler, pin_memory=True, drop_last=False)
    
spatial_test_dataloader = torch.utils.data.DataLoader(spatial_test_dataset, batch_size, shuffle=False, pin_memory=True, drop_last=False)
temporal_test_dataloader = torch.utils.data.DataLoader(temporal_test_dataset, batch_size, shuffle=False, pin_memory=True, drop_last=False)

In [10]:
# Subbasins with constant streamflow will divide by zero in loss calculation. Calculate loss without them.
train_non_constant_subbasin_mask = ((train_dataset.y_sim.min(dim=0)[0] - train_dataset.y_sim.max(dim=0)[0]) != 0).to(device)
val_non_constant_subbasin_mask = ((val_dataset.y_sim.min(dim=0)[0] - val_dataset.y_sim.max(dim=0)[0]) != 0).to(device)

y_train_means = train_dataset.y_sim_means[train_non_constant_subbasin_mask].to(device)
y_val_means = val_dataset.y_sim_means[val_non_constant_subbasin_mask].to(device)

In [11]:
torch.manual_seed(0)
np.random.seed(0)
for epoch in range(num_epochs):
    model.train()

    train_losses = torch.tensor(0.0)
    for i, train_batch in enumerate(train_dataloader):
        y_pred = model(train_batch['x'].permute(0,2,1,3).to(device), train_adjacency)
        train_loss = loss_fn(y_pred[:,train_non_constant_subbasin_mask], train_batch['y_sim'][:,train_non_constant_subbasin_mask].to(device), means=y_train_means)
        
        optimizer.zero_grad()
        train_loss.backward()
        optimizer.step()
        
        train_losses += train_loss.detach()
        
    train_loss = (train_losses / len(train_dataloader)).item()
    print('Epoch', epoch, 'mean train loss:\t{}'.format(train_loss))
    writer.add_scalar('loss_nse', train_loss, epoch)
    
    model.eval()
    val_losses = torch.tensor(0.0)
    for i, val_batch in enumerate(val_dataloader):
        y_pred = model(val_batch['x'].permute(0,2,1,3).to(device), val_adjacency).detach()
        val_losses += loss_fn(y_pred[:,val_non_constant_subbasin_mask], val_batch['y_sim'][:,val_non_constant_subbasin_mask].to(device), means=y_val_means).detach()
            
    val_loss = (val_losses / len(val_dataloader)).item()
    print('Epoch', epoch, 'mean val loss:\t{}'.format(val_loss))
    writer.add_scalar('loss_nse_val', val_loss, epoch)
    
    if val_loss < best_loss_model[1] - min_improvement:
        best_loss_model = (epoch, val_loss, model.state_dict())  # new best model
        load_data.pickle_model('STGCN_simulationTraining', model, 'allStations', time_stamp, model_type='torch.dill')
    elif epoch > best_loss_model[0] + patience:
        print('Patience exhausted in epoch {}. Best val-loss was {}'.format(epoch, best_loss_model[1]))
        break
    
print('Using best model from epoch', str(best_loss_model[0]), 'which had loss', str(best_loss_model[1]))
model.load_state_dict(best_loss_model[2])
load_data.save_model_with_state('STGCN_simulationTraining', best_loss_model[0], model, optimizer, time_stamp, use_dill=True)

Epoch 0 mean train loss:	6.146320343017578
Epoch 0 mean val loss:	1.468135118484497
Saved model as /home/mgauch/runoff-nn/src/../pickle/models/STGCN_simulationTraining_allStations_20190822-154720.pkl
Epoch 1 mean train loss:	1.5138360261917114
Epoch 1 mean val loss:	1.5294221639633179
Epoch 2 mean train loss:	1.5146418809890747
Epoch 2 mean val loss:	2.6154510974884033
Epoch 3 mean train loss:	1.6060311794281006
Epoch 3 mean val loss:	1.6213895082473755
Epoch 4 mean train loss:	1.5380980968475342
Epoch 4 mean val loss:	1.6070563793182373
Epoch 5 mean train loss:	1.3924682140350342
Epoch 5 mean val loss:	2.7654836177825928
Epoch 6 mean train loss:	1.3461552858352661
Epoch 6 mean val loss:	1.364626169204712
Saved model as /home/mgauch/runoff-nn/src/../pickle/models/STGCN_simulationTraining_allStations_20190822-154720.pkl
Epoch 7 mean train loss:	1.3572975397109985
Epoch 7 mean val loss:	1.5486794710159302
Epoch 8 mean train loss:	1.3160547018051147
Epoch 8 mean val loss:	1.37301576137542

In [12]:
del y_train_means, y_val_means, y_pred, train_non_constant_subbasin_mask, val_non_constant_subbasin_mask
if USE_CUDA:
    torch.cuda.empty_cache()

In [13]:
logger.warning('predicting')
model.eval()

spatial_test_predictions = []  # test on different graph, different time
for i, test_batch in enumerate(spatial_test_dataloader):
    pred = model(test_batch['x'].permute(0,2,1,3).to(device), test_adjacency).detach().cpu()
    spatial_test_predictions.append(pred)
    
temporal_test_predictions = []  # test on train graph but different time
for i, test_batch in enumerate(temporal_test_dataloader):
    pred = model(test_batch['x'].permute(0,2,1,3).to(device), train_adjacency).detach().cpu()
    temporal_test_predictions.append(pred)
    
predictions = torch.cat([torch.cat(spatial_test_predictions), torch.cat(temporal_test_predictions)], dim=1)

2019-08-22 18:37:20,180 - 20190822-154720 - predicting


In [14]:
actuals = spatial_test_dataset.grid_dataset.data_runoff.copy()
if len(actuals['date'].unique()) != len(predictions):
    print('Warning: length of prediction {} and actuals {} does not match.'.format(len(predictions), len(actuals['date'].unique())))

nse_dict, nse_sim_dict = {}, {}
mse_dict, mse_sim_dict = {}, {}
predictions_df = pd.DataFrame(columns=actuals.columns)
predictions_df['is_test_subbasin'] = False
for i in range(len(test_subbasins + train_subbasins)):
    subbasin = (test_subbasins + train_subbasins)[i]
    station = None
    subbasin_sim = spatial_test_dataset.grid_dataset.simulated_streamflow[spatial_test_dataset.grid_dataset.simulated_streamflow['subbasin'] == subbasin].set_index('date')
    if subbasin in station_subbasins:
        station = subbasin_sim['StationID'].values[0]
        act = actuals[actuals['station'] == station].set_index('date')['runoff']
    if predictions.shape[0] != subbasin_sim.shape[0]:
        print('Warning: length of prediction {} and actuals {} does not match for subbasin {}. Ignoring excess actuals.'.format(len(predictions), len(subbasin_sim), subbasin))
        subbasin_sim = subbasin_sim.iloc[:predictions.shape[0]]
        if station is not None:
            act = act.iloc[:predictions.shape[0]]
            
    pred = pd.DataFrame({'runoff': predictions[:,i]}, index=subbasin_sim.index)
    pred['subbasin'] = subbasin
    pred['station'] = station
    pred['is_test_subbasin'] = subbasin in test_subbasins
    predictions_df = predictions_df.append(pred.reset_index(), sort=True)
    subbasin_type = 'test' if subbasin in test_subbasins else 'train'
    nse_sim, mse_sim = evaluate.evaluate_daily('Sub{}'.format(subbasin), pred['runoff'], subbasin_sim['simulated_streamflow'], writer=writer, group=subbasin_type)
    nse_sim_dict[subbasin] = nse_sim
    mse_sim_dict[subbasin] = mse_sim

    if station is not None:
        nse, mse = evaluate.evaluate_daily(station, pred['runoff'], act, writer=writer)
        nse_dict[subbasin] = nse
        mse_dict[subbasin] = mse
        print(station, subbasin, '\tNSE:', nse, '\tMSE:', mse, '(clipped to 0)')
    print(subbasin, '\tNSE sim:', nse_sim, '\tMSE sim:', mse_sim)


To register the converters:
	>>> from pandas.plotting import register_matplotlib_converters
	>>> register_matplotlib_converters()


1 	NSE sim: -1.165548537497524 	MSE sim: 2505383.5909984224
474 	NSE sim: -1.1215358442582 	MSE sim: 2482751.1026055203
11 	NSE sim: -2.8204570465334204 	MSE sim: 46.62158032392791
5 	NSE sim: -7.65593883566077 	MSE sim: 209.94506841201994
10 	NSE sim: -15.064522675886636 	MSE sim: 91.56513259124361
04214500 705 	NSE: 0.15347986174877204 	MSE: 102.03707159821239 (clipped to 0)
705 	NSE sim: -0.22452960344209938 	MSE sim: 29.315677609182263
8 	NSE sim: -40.23355224441482 	MSE sim: 313.89009817628516
556 	NSE sim: -19.007237075004394 	MSE sim: 4643.219860512157
2 	NSE sim: 0.02974960431861451 	MSE sim: 82.35241542301488
4 	NSE sim: 0.2718870552746684 	MSE sim: 12.406691658583993
12 	NSE sim: -123.40109542162381 	MSE sim: 366.0869561196895
04215000 688 	NSE: -0.018667449103831446 	MSE: 97.8458738144051 (clipped to 0)
688 	NSE sim: 0.29203815218150786 	MSE sim: 5.699340877503912
04215500 706 	NSE: 0.09909891135146753 	MSE: 162.68914340596558 (clipped to 0)
706 	NSE sim: 0.21503690298999023

  np.sum((evaluation - np.mean(evaluation)) ** 2, dtype=np.float64))


499 	NSE sim: -inf 	MSE sim: 0.9894395927103344
280 	NSE sim: 0.34501334121210814 	MSE sim: 1.0671131896197865
506 	NSE sim: 0.32597233742716636 	MSE sim: 23.701628678795693
583 	NSE sim: 0.41744875915403856 	MSE sim: 150.79095252315747
671 	NSE sim: 0.31817530833069574 	MSE sim: 0.2117095388188115
513 	NSE sim: 0.33323913144420503 	MSE sim: 146.48583001559078
654 	NSE sim: 0.420405328353027 	MSE sim: 115.22449739342444
511 	NSE sim: 0.31806277867415034 	MSE sim: 0.496723977740931
512 	NSE sim: 0.3731207429585579 	MSE sim: 146.93860474027235
243 	NSE sim: 0.330445967954061 	MSE sim: 58.85201408388163
502 	NSE sim: 0.30605660787567934 	MSE sim: 0.07234536642288482
509 	NSE sim: 0.3179659396522201 	MSE sim: 39.246703371581326
647 	NSE sim: 0.30166086063218767 	MSE sim: 0.024408685880411025
253 	NSE sim: 0.35435490090740407 	MSE sim: 0.8075758194053215
04174500 707 	NSE: -0.10371050597658305 	MSE: 104.38257851446473 (clipped to 0)
707 	NSE sim: 0.3923969290552445 	MSE sim: 97.197796018918

  np.sum((evaluation - np.mean(evaluation)) ** 2, dtype=np.float64))


531 	NSE sim: 0.33858414639492873 	MSE sim: 0.7905607409661122
660 	NSE sim: 0.35006866514211543 	MSE sim: 0.6851473371119194
532 	NSE sim: 0.3466816963844591 	MSE sim: 0.21473747455468012
587 	NSE sim: 0.24557158241724786 	MSE sim: 3.3829929682559032
589 	NSE sim: 0.3352467418369305 	MSE sim: 0.030558821037721055
592 	NSE sim: 0.3264370536905502 	MSE sim: 0.3796044241039212
601 	NSE sim: 0.39457507371926437 	MSE sim: 0.19371135777095028
611 	NSE sim: 0.17362894618190494 	MSE sim: 0.1487474196034908
612 	NSE sim: 0.34700041849570595 	MSE sim: 0.2444916712624642
616 	NSE sim: 0.25155370157560963 	MSE sim: 1.5379414851100008
618 	NSE sim: 0.41571962899524306 	MSE sim: 0.5294684675433903
619 	NSE sim: 0.38243833744676514 	MSE sim: 0.7479397482305067
622 	NSE sim: 0.24826366880384665 	MSE sim: 0.4457700363789105
119 	NSE sim: 0.20275746257818383 	MSE sim: 1.572041749340771
04208504 719 	NSE: -0.1394617554027644 	MSE: 1112.7170567641745 (clipped to 0)
719 	NSE sim: 0.09123170161193495 	MSE 

In [15]:
def print_nse_mse(name, nse_dict, mse_dict, subbasins):
    nses = list(nse_dict[s] for s in subbasins)
    mses = list(mse_dict[s] for s in subbasins)
    print(name, 'Median NSE (clipped to 0)', np.median(nses), '/ Min', np.min(nses), '/ Max', np.max(nses))
    print(' ' * len(name), 'Median MSE (clipped to 0)', np.median(mses), '/ Min', np.min(mses), '/ Max', np.max(mses))
    
    return np.median(nses)

nse_median_sim_temporal = print_nse_mse('Temporal test sim', nse_sim_dict, mse_sim_dict, train_subbasins)
nse_median_sim_spatial = print_nse_mse('Spatial test sim', nse_sim_dict, mse_sim_dict, test_subbasins)
nse_median_stations_temporal = print_nse_mse('Stations temporal test', nse_dict, mse_dict, list(s for s in station_subbasins if s in train_subbasins))
nse_median_stations_spatial = print_nse_mse('Stations spatial test', nse_dict, mse_dict, list(s for s in station_subbasins if s in test_subbasins))

writer.add_scalar('nse_median_sim_temporal', nse_median_sim_temporal)
writer.add_scalar('nse_median_sim', nse_median_sim_spatial)
writer.add_scalar('nse_median_stations_temporal', nse_median_stations_temporal)
writer.add_scalar('nse_median_stations_spatial', nse_median_stations_spatial)

Temporal test sim Median NSE (clipped to 0) 0.33710950562856307 / Min -inf / Max 0.5768430159056654
                  Median MSE (clipped to 0) 8.103743071308187 / Min 0.00027874941294239876 / Max 32147.537845798048
Spatial test sim Median NSE (clipped to 0) -0.43968021707683025 / Min -8296.721857869343 / Max 0.3918342990133574
                 Median MSE (clipped to 0) 63.66736954950816 / Min 1.2015533278518664 / Max 2505383.5909984224
Stations temporal test Median NSE (clipped to 0) 0.1705857472958892 / Min -0.7080200856025896 / Max 0.3851549734240671
                       Median MSE (clipped to 0) 136.8793934907034 / Min 12.525436821800888 / Max 43531.48856517923
Stations spatial test Median NSE (clipped to 0) -0.19563027508370978 / Min -0.952156834452071 / Max 0.19532311890726406
                      Median MSE (clipped to 0) 175.76351157310705 / Min 28.55683185490028 / Max 14653.428620462104


In [16]:
nse_dict

{705: 0.15347986174877204,
 688: -0.018667449103831446,
 706: 0.09909891135146753,
 687: -0.9512213229840285,
 686: 0.08682468062069171,
 677: -0.952156834452071,
 676: -0.1476823307136499,
 685: -0.20815977279409292,
 684: -0.18310077737332664,
 704: -0.29103033940610823,
 703: -0.41796471345510633,
 710: -0.3716520740750482,
 701: 0.19532311890726406,
 702: -0.31224088659318805,
 718: 0.09436707501627306,
 717: 0.14054088410710464,
 716: 0.12080909970939158,
 714: 0.2092750390800152,
 709: 0.31824446098855674,
 715: 0.15068809560748175,
 723: 0.1727911389268667,
 724: 0.21684429548870354,
 713: 0.3851549734240671,
 712: 0.18254931921198614,
 707: -0.10371050597658305,
 719: -0.1394617554027644,
 720: 0.017705905509979303,
 721: 0.12904153381273342,
 690: -0.1667080614863432,
 695: 0.2084073521358044,
 694: 0.18782911329679386,
 697: 0.18926775705997156,
 683: 0.20304028984991185,
 689: -0.7080200856025896,
 680: 0.3794339193700047,
 692: -0.29926079999169786,
 696: 0.2075499515669649

In [17]:
writer.close()

In [18]:
save_df = pd.merge(predictions_df.rename({'runoff': 'prediction'}, axis=1), 
                   spatial_test_dataset.grid_dataset.simulated_streamflow, on=['date', 'subbasin'])
save_df = pd.merge(save_df, actuals.rename({'runoff': 'actual'}, axis=1), how='left', on=['date', 'station'])\
            [['date', 'subbasin', 'station', 'prediction', 'actual', 'simulated_streamflow', 'is_test_subbasin']]
load_data.pickle_results('STGCN_simulationTraining', save_df, time_stamp)

'STGCN_simulationTraining_20190822-154720.pkl'

In [19]:
_ = print(train_subbasins), print(val_subbasins), print(test_subbasins)

[602, 112, 123, 132, 131, 718, 140, 124, 130, 617, 608, 135, 144, 136, 717, 145, 138, 137, 716, 141, 155, 538, 664, 142, 139, 614, 143, 533, 151, 603, 606, 146, 212, 205, 199, 181, 189, 160, 180, 159, 200, 188, 714, 615, 166, 147, 620, 148, 241, 238, 237, 655, 245, 518, 187, 218, 152, 225, 191, 596, 274, 668, 179, 527, 231, 709, 659, 516, 217, 514, 221, 226, 186, 173, 230, 522, 178, 210, 517, 242, 515, 185, 162, 246, 236, 550, 262, 153, 222, 201, 621, 295, 247, 174, 161, 219, 715, 202, 209, 723, 208, 263, 206, 286, 184, 175, 551, 227, 154, 724, 156, 169, 158, 593, 182, 588, 163, 599, 164, 609, 165, 549, 446, 349, 372, 674, 342, 665, 380, 421, 365, 424, 327, 528, 300, 455, 267, 386, 228, 255, 297, 328, 394, 322, 170, 257, 415, 540, 229, 411, 534, 330, 376, 318, 423, 310, 344, 404, 239, 451, 405, 417, 607, 427, 355, 452, 357, 459, 377, 207, 224, 440, 341, 363, 214, 428, 364, 347, 460, 375, 419, 371, 343, 171, 308, 311, 366, 332, 437, 524, 666, 523, 407, 422, 223, 232, 408, 713, 425, 296,

In [20]:
list(k for k in list(nse_dict.keys()) if k in test_subbasins)

[705, 688, 706, 687, 686, 677, 676, 685, 684, 704, 703, 710, 701, 702]

In [21]:
datetime.now().strftime('%Y%m%d-%H%M%S')

'20190822-183842'