ConvLSTM trained on simulated streamflow.

In [1]:
%load_ext autoreload
%autoreload 2
import sys
sys.path.append('..')
import os
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt 
from datetime import datetime, timedelta
import netCDF4 as nc
import torch
from torch import nn, utils
from torch.utils.tensorboard import SummaryWriter
from src import load_data, evaluate, conv_lstm, datasets, utils, stgcn
import random
import pickle
import json
import networkx as nx

time_stamp = datetime.now().strftime('%Y%m%d-%H%M%S')
time_stamp

'20190822-165958'

In [2]:
import logging
logger = logging.getLogger()
fhandler = logging.FileHandler(filename='../log.out', mode='a')
chandler = logging.StreamHandler(sys.stdout)
formatter = logging.Formatter('%(asctime)s - {} - %(message)s'.format(time_stamp))
fhandler.setFormatter(formatter)
chandler.setFormatter(formatter)
logger.addHandler(fhandler)
logger.addHandler(chandler)
logger.setLevel(logging.INFO)

In [3]:
USE_CUDA = False
if torch.cuda.is_available():
    print('CUDA Available')
    USE_CUDA = True
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
device = torch.device('cuda:0' if USE_CUDA else 'cpu')
num_devices = torch.cuda.device_count() if USE_CUDA else 0
logger.warning('cuda devices: {}'.format(list(torch.cuda.get_device_name(i) for i in range(num_devices))))
torch.manual_seed(0)
np.random.seed(0)
random.seed(0)

CUDA Available
2019-08-22 16:59:58,938 - 20190822-165958 - cuda devices: ['Tesla V100-SXM2-16GB']


In [4]:
partitioning_strategy = 'distance'  # 'distance' or 'unilabel', see https://arxiv.org/abs/1801.07455
max_hops = 10 if partitioning_strategy == 'distance' else None
rdrs_vars = [4, 5]
agg = ['sum', 'minmax']
include_month = True
dem, landcover, soil, groundwater = True, False, False, False
landcover_types = []
seq_len = 8
seq_steps = 1

train_start = datetime.strptime('2010-01-01', '%Y-%m-%d') + timedelta(days=seq_len * seq_steps)  # first day for which to make a prediction in train set
train_end = '2012-12-31'
test_start = '2013-01-01'
test_end = '2014-12-31'
spatial_val = False  # Whether the val set does spatial or temporal validation
val_fraction = 0.1 if not spatial_val else None

In [5]:
with open('../data/simulations_shervan/subbasins.geojson', 'r') as f:
     subbasin_shapes = json.loads(f.read())

subbasin_graph = utils.create_subbasin_graph()
component_graph = subbasin_graph.copy()
component_graph.remove_nodes_from(['sub-1', 'sub1', 'sub474'])  # remove Lake Erie and sink to get connected components
connected_components = list(nx.connected_components(nx.Graph(component_graph)))

# Split into train/test/val regions
test_subbasins = [1, 474]
train_subbasins = []
val_subbasins = []
for component in connected_components:
    max_x = -999
    for node in component:
        subbasin = list(s['properties'] for s in subbasin_shapes['features'] if 'sub' + str(s['properties']['SubId']) == node)[0]
        max_x = max(max_x, subbasin['INSIDE_X'])
    if max_x < -81.9:
        train_subbasins += list(int(c[3:]) for c in component)
    elif -80.6 > max_x and max_x >= -81.9:
        val_subbasins += list(int(c[3:]) for c in component)
    else:
        test_subbasins += list(int(c[3:]) for c in component)
        
if not spatial_val:  # if no spatial validation, use same graph but different samples
    train_subbasins += val_subbasins
    val_subbasins = train_subbasins
    
train_subgraph = subbasin_graph.subgraph(list('sub' + str(t) for t in train_subbasins))
val_subgraph = subbasin_graph.subgraph(list('sub' + str(t) for t in val_subbasins))
test_subgraph = subbasin_graph.subgraph(list('sub' + str(t) for t in test_subbasins))
if partitioning_strategy == 'unilabel':
    train_adjacency = torch.unsqueeze(torch.from_numpy(nx.to_numpy_array(train_subraph)), 0).float().to(device)
    val_adjacency = torch.unsqueeze(torch.from_numpy(nx.to_numpy_array(val_subgraph)), 0).float().to(device)
    test_adjacency = torch.unsqueeze(torch.from_numpy(nx.to_numpy_array(test_subraph)), 0).float().to(device)
elif partitioning_strategy == 'distance':  # use distances in upstream-graph, i.e. in reversed downstream-graph
    train_adjacency = utils.create_hop_matrix(train_subgraph.reverse(), max_hops).float().to(device)
    val_adjacency = utils.create_hop_matrix(val_subgraph.reverse(), max_hops).float().to(device)
    test_adjacency = utils.create_hop_matrix(test_subgraph.reverse(), max_hops).float().to(device)
else:
    raise Exception('Unsupported partitioning strategy')

subbasins = list(set(train_subbasins + test_subbasins + val_subbasins))

In [6]:
train_dataset = datasets.SubbasinAggregatedDataset(rdrs_vars, train_subbasins, seq_len, seq_steps, train_start, train_end, aggregate_daily=agg, include_months=include_month, 
                                                   dem=dem, landcover=landcover, soil=soil, groundwater=groundwater, landcover_types=landcover_types)
if spatial_val:
    val_dataset = datasets.SubbasinAggregatedDataset(rdrs_vars, val_subbasins, seq_len, seq_steps, train_start, train_end, aggregate_daily=agg, include_months=include_month, 
                                                     conv_scalers=train_dataset.grid_dataset.conv_scalers, dem=dem, landcover=landcover, soil=soil, groundwater=groundwater, landcover_types=landcover_types)
else:
    val_dataset = train_dataset

# Two test datasets: one with spatial and temporal validation (i.e., different graph, different time), and one with only temporal validation (i.e. different time period only)
spatial_test_dataset = datasets.SubbasinAggregatedDataset(rdrs_vars, test_subbasins, seq_len, seq_steps, test_start, test_end, aggregate_daily=agg, include_months=include_month, 
                                                          conv_scalers=train_dataset.grid_dataset.conv_scalers, dem=dem, landcover=landcover, soil=soil, groundwater=groundwater, landcover_types=landcover_types)
temporal_test_dataset = datasets.SubbasinAggregatedDataset(rdrs_vars, train_subbasins, seq_len, seq_steps, test_start, test_end, aggregate_daily=agg, include_months=include_month, 
                                                           conv_scalers=train_dataset.grid_dataset.conv_scalers, dem=dem, landcover=landcover, soil=soil, groundwater=groundwater, landcover_types=landcover_types)

station_subbasins = train_dataset.grid_dataset.simulated_streamflow[~pd.isna(train_dataset.grid_dataset.simulated_streamflow['StationID'])]['subbasin'].unique()

  


Loading subbasin shapes
Aggregating into subbasins


  # This is added back by InteractiveShellApp.init_path()


Loading subbasin shapes
Aggregating into subbasins


  del sys.path[0]


Loading subbasin shapes
Aggregating into subbasins


In [7]:
# Train model
num_epochs = 500
learning_rate = 2e-3
patience = 300
min_improvement = 0.01
best_loss_model = (-1, np.inf, None)
dropout = 0.2
weight_decay = 1e-5

batch_size = 16
model = stgcn.Model(train_dataset.x.shape[2], train_adjacency.shape[0]).to(device)
if num_devices > 1:
    model = torch.nn.DataParallel(model, device_ids=list(range(num_devices)))
loss_fn = evaluate.NSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate, weight_decay=weight_decay)

writer = SummaryWriter(comment='STGCN_simulationTraining')
param_description = {'time_stamp': time_stamp, 'batch_size': batch_size, 'loss': loss_fn, 'include_month': include_month, 'aggregate_daily': agg, 'rdrs_vars': rdrs_vars, 'dropout': dropout, 'spatial_validation': spatial_val, 'val_fraction': val_fraction,
                     'optimizer': optimizer, 'lr': learning_rate, 'patience': patience, 'min_improvement': min_improvement, 'x_train_shape': train_dataset.x.shape, 'x_val_shape': val_dataset.x.shape, 'partitioning_strategy': partitioning_strategy, 'max_hops': max_hops,
                     'spatial_x_test_shape': spatial_test_dataset.x.shape, 'temporal_x_test_shape': temporal_test_dataset.x.shape, 'num_epochs': num_epochs, 'seq_len': seq_len, 'seq_steps': seq_steps, 'train_start': train_start, 'train_end': train_end, 'weight_decay': weight_decay, 
                     'landcover_types': landcover_types, 'test_start': test_start, 'test_end': test_end, 'model': str(model).replace('\n','').replace(' ', ''),}
writer.add_text('Parameter Description', str(param_description))
str(param_description)

"{'time_stamp': '20190822-165958', 'batch_size': 16, 'loss': NSELoss(), 'include_month': True, 'aggregate_daily': ['sum', 'minmax'], 'rdrs_vars': [4, 5], 'dropout': 0.2, 'spatial_validation': False, 'val_fraction': 0.1, 'optimizer': Adam (\nParameter Group 0\n    amsgrad: False\n    betas: (0.9, 0.999)\n    eps: 1e-08\n    lr: 0.002\n    weight_decay: 1e-05\n), 'lr': 0.002, 'patience': 300, 'min_improvement': 0.01, 'x_train_shape': torch.Size([1088, 8, 48, 586]), 'x_val_shape': torch.Size([1088, 8, 48, 586]), 'partitioning_strategy': 'distance', 'max_hops': 10, 'spatial_x_test_shape': torch.Size([730, 8, 48, 138]), 'temporal_x_test_shape': torch.Size([730, 8, 48, 586]), 'num_epochs': 500, 'seq_len': 8, 'seq_steps': 1, 'train_start': datetime.datetime(2010, 1, 9, 0, 0), 'train_end': '2012-12-31', 'weight_decay': 1e-05, 'landcover_types': [], 'test_start': '2013-01-01', 'test_end': '2014-12-31', 'model': 'Model((st_gcn_networks):ModuleList((0):st_gcn((gcn):ConvTemporalGraphical((conv):Co

In [8]:
if spatial_val:
    train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size, shuffle=True, pin_memory=True, drop_last=False)
    val_dataloader = torch.utils.data.DataLoader(val_dataset, batch_size, shuffle=True, pin_memory=True, drop_last=False)
else:
    val_indices = np.random.choice(len(train_dataset), size=int(val_fraction * len(train_dataset)), replace=False)
    train_indices = list(i for i in range(len(train_dataset)) if i not in val_indices)
    train_sampler = torch.utils.data.SubsetRandomSampler(train_indices)
    val_sampler = torch.utils.data.SubsetRandomSampler(val_indices)
    train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size, sampler=train_sampler, pin_memory=True, drop_last=False)
    val_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size, sampler=val_sampler, pin_memory=True, drop_last=False)
    
spatial_test_dataloader = torch.utils.data.DataLoader(spatial_test_dataset, batch_size, shuffle=False, pin_memory=True, drop_last=False)
temporal_test_dataloader = torch.utils.data.DataLoader(temporal_test_dataset, batch_size, shuffle=False, pin_memory=True, drop_last=False)

In [9]:
# Subbasins with constant streamflow will divide by zero in loss calculation. Calculate loss without them.
train_non_constant_subbasin_mask = ((train_dataset.y_sim.min(dim=0)[0] - train_dataset.y_sim.max(dim=0)[0]) != 0).to(device)
val_non_constant_subbasin_mask = ((val_dataset.y_sim.min(dim=0)[0] - val_dataset.y_sim.max(dim=0)[0]) != 0).to(device)

y_train_means = train_dataset.y_sim_means[train_non_constant_subbasin_mask].to(device)
y_val_means = val_dataset.y_sim_means[val_non_constant_subbasin_mask].to(device)

In [10]:
torch.manual_seed(0)
np.random.seed(0)
for epoch in range(num_epochs):
    model.train()

    train_losses = torch.tensor(0.0)
    for i, train_batch in enumerate(train_dataloader):
        y_pred = model(train_batch['x'].permute(0,2,1,3).to(device), train_adjacency)
        train_loss = loss_fn(y_pred[:,train_non_constant_subbasin_mask], train_batch['y_sim'][:,train_non_constant_subbasin_mask].to(device), means=y_train_means)
        
        optimizer.zero_grad()
        train_loss.backward()
        optimizer.step()
        
        train_losses += train_loss.detach()
        
    train_loss = (train_losses / len(train_dataloader)).item()
    print('Epoch', epoch, 'mean train loss:\t{}'.format(train_loss))
    writer.add_scalar('loss_nse', train_loss, epoch)
    
    model.eval()
    val_losses = torch.tensor(0.0)
    for i, val_batch in enumerate(val_dataloader):
        y_pred = model(val_batch['x'].permute(0,2,1,3).to(device), val_adjacency).detach()
        val_losses += loss_fn(y_pred[:,val_non_constant_subbasin_mask], val_batch['y_sim'][:,val_non_constant_subbasin_mask].to(device), means=y_val_means).detach()
            
    val_loss = (val_losses / len(val_dataloader)).item()
    print('Epoch', epoch, 'mean val loss:\t{}'.format(val_loss))
    writer.add_scalar('loss_nse_val', val_loss, epoch)
    
    if val_loss < best_loss_model[1] - min_improvement:
        best_loss_model = (epoch, val_loss, model.state_dict())  # new best model
        load_data.pickle_model('STGCN_simulationTraining', model, 'allStations', time_stamp, model_type='torch.dill')
    elif epoch > best_loss_model[0] + patience:
        print('Patience exhausted in epoch {}. Best val-loss was {}'.format(epoch, best_loss_model[1]))
        break
    
print('Using best model from epoch', str(best_loss_model[0]), 'which had loss', str(best_loss_model[1]))
model.load_state_dict(best_loss_model[2])
load_data.save_model_with_state('STGCN_simulationTraining', best_loss_model[0], model, optimizer, time_stamp, use_dill=True)

Epoch 0 mean train loss:	15.948393821716309
Epoch 0 mean val loss:	1.527636170387268
Saved model as /home/mgauch/runoff-nn/src/../pickle/models/STGCN_simulationTraining_allStations_20190822-165958.pkl
Epoch 1 mean train loss:	1.4579609632492065
Epoch 1 mean val loss:	1.4805171489715576
Saved model as /home/mgauch/runoff-nn/src/../pickle/models/STGCN_simulationTraining_allStations_20190822-165958.pkl
Epoch 2 mean train loss:	1.457152247428894
Epoch 2 mean val loss:	1.502930998802185
Epoch 3 mean train loss:	1.393175482749939
Epoch 3 mean val loss:	1.4247645139694214
Saved model as /home/mgauch/runoff-nn/src/../pickle/models/STGCN_simulationTraining_allStations_20190822-165958.pkl
Epoch 4 mean train loss:	1.382936716079712
Epoch 4 mean val loss:	1.4012304544448853
Saved model as /home/mgauch/runoff-nn/src/../pickle/models/STGCN_simulationTraining_allStations_20190822-165958.pkl
Epoch 5 mean train loss:	1.371720552444458
Epoch 5 mean val loss:	1.4650728702545166
Epoch 6 mean train loss:	1

In [11]:
del y_train_means, y_val_means, y_pred, train_non_constant_subbasin_mask, val_non_constant_subbasin_mask
if USE_CUDA:
    torch.cuda.empty_cache()

In [12]:
logger.warning('predicting')
model.eval()

spatial_test_predictions = []  # test on different graph, different time
for i, test_batch in enumerate(spatial_test_dataloader):
    pred = model(test_batch['x'].permute(0,2,1,3).to(device), test_adjacency).detach().cpu()
    spatial_test_predictions.append(pred)
    
temporal_test_predictions = []  # test on train graph but different time
for i, test_batch in enumerate(temporal_test_dataloader):
    pred = model(test_batch['x'].permute(0,2,1,3).to(device), train_adjacency).detach().cpu()
    temporal_test_predictions.append(pred)
    
predictions = torch.cat([torch.cat(spatial_test_predictions), torch.cat(temporal_test_predictions)], dim=1)

2019-08-22 19:35:23,606 - 20190822-165958 - predicting


In [13]:
actuals = spatial_test_dataset.grid_dataset.data_runoff.copy()
if len(actuals['date'].unique()) != len(predictions):
    print('Warning: length of prediction {} and actuals {} does not match.'.format(len(predictions), len(actuals['date'].unique())))

nse_dict, nse_sim_dict = {}, {}
mse_dict, mse_sim_dict = {}, {}
predictions_df = pd.DataFrame(columns=actuals.columns)
predictions_df['is_test_subbasin'] = False
for i in range(len(test_subbasins + train_subbasins)):
    subbasin = (test_subbasins + train_subbasins)[i]
    station = None
    subbasin_sim = spatial_test_dataset.grid_dataset.simulated_streamflow[spatial_test_dataset.grid_dataset.simulated_streamflow['subbasin'] == subbasin].set_index('date')
    if subbasin in station_subbasins:
        station = subbasin_sim['StationID'].values[0]
        act = actuals[actuals['station'] == station].set_index('date')['runoff']
    if predictions.shape[0] != subbasin_sim.shape[0]:
        print('Warning: length of prediction {} and actuals {} does not match for subbasin {}. Ignoring excess actuals.'.format(len(predictions), len(subbasin_sim), subbasin))
        subbasin_sim = subbasin_sim.iloc[:predictions.shape[0]]
        if station is not None:
            act = act.iloc[:predictions.shape[0]]
            
    pred = pd.DataFrame({'runoff': predictions[:,i]}, index=subbasin_sim.index)
    pred['subbasin'] = subbasin
    pred['station'] = station
    pred['is_test_subbasin'] = subbasin in test_subbasins
    predictions_df = predictions_df.append(pred.reset_index(), sort=True)
    subbasin_type = 'test' if subbasin in test_subbasins else 'train'
    nse_sim, mse_sim = evaluate.evaluate_daily('Sub{}'.format(subbasin), pred['runoff'], subbasin_sim['simulated_streamflow'], writer=writer, group=subbasin_type)
    nse_sim_dict[subbasin] = nse_sim
    mse_sim_dict[subbasin] = mse_sim

    if station is not None:
        nse, mse = evaluate.evaluate_daily(station, pred['runoff'], act, writer=writer)
        nse_dict[subbasin] = nse
        mse_dict[subbasin] = mse
        print(station, subbasin, '\tNSE:', nse, '\tMSE:', mse, '(clipped to 0)')
    print(subbasin, '\tNSE sim:', nse_sim, '\tMSE sim:', mse_sim)


To register the converters:
	>>> from pandas.plotting import register_matplotlib_converters
	>>> register_matplotlib_converters()


1 	NSE sim: -1.141455908758859 	MSE sim: 2477510.1558568478
474 	NSE sim: -1.1482526133707234 	MSE sim: 2514016.701134857
12 	NSE sim: 0.44896071959116135 	MSE sim: 1.6215957920913189
10 	NSE sim: -0.18349013839074924 	MSE sim: 6.745698806528481
4 	NSE sim: -12.60374469811494 	MSE sim: 231.80121586119762
11 	NSE sim: -0.47193188892488025 	MSE sim: 17.962194039880547
8 	NSE sim: 0.1300486870415145 	MSE sim: 6.622497654688847
556 	NSE sim: -0.10629110750823756 	MSE sim: 256.74473804819996
9 	NSE sim: 0.36326401342401526 	MSE sim: 2.5845889834346614
2 	NSE sim: -8.193658065881726 	MSE sim: 780.3345936972637
04215500 706 	NSE: 0.004832564247214122 	MSE: 179.71222335962239 (clipped to 0)
706 	NSE sim: 0.09659924647198448 	MSE sim: 27.18362494429424
5 	NSE sim: -48.439463561105896 	MSE sim: 1199.127183850716
3 	NSE sim: -0.5060706898513154 	MSE sim: 47.92611799135414
04214500 705 	NSE: -6.547928199952506 	MSE: 909.8052785226851 (clipped to 0)
705 	NSE sim: -39.89289073447498 	MSE sim: 978.99

  np.sum((evaluation - np.mean(evaluation)) ** 2, dtype=np.float64))


279 	NSE sim: 0.31066154979463534 	MSE sim: 4.896578397604699
583 	NSE sim: 0.2865775906696334 	MSE sim: 184.66640719547908
513 	NSE sim: 0.40680235036940804 	MSE sim: 130.32415993105548
509 	NSE sim: 0.2582095293066158 	MSE sim: 42.685303065837864
505 	NSE sim: 0.32229387702975576 	MSE sim: 1.927170213728569
503 	NSE sim: 0.3294365953370536 	MSE sim: 0.018096516954642435
254 	NSE sim: 0.2642428334342637 	MSE sim: 49.84889879072211
244 	NSE sim: 0.14098753511969786 	MSE sim: 4.00123182741066
653 	NSE sim: 0.3765160141236892 	MSE sim: 137.36044303444905
216 	NSE sim: 0.3663284201293888 	MSE sim: 108.29282168145055
647 	NSE sim: 0.341496540396902 	MSE sim: 0.023016330018630474
649 	NSE sim: -0.33690873450125713 	MSE sim: 34.069554652307176
273 	NSE sim: 0.3515747923300726 	MSE sim: 0.5589075852131828
512 	NSE sim: 0.4077696743955702 	MSE sim: 138.81699984761056
04174500 707 	NSE: -0.1961240357540639 	MSE: 113.12251754336927 (clipped to 0)
707 	NSE sim: 0.3782179054407506 	MSE sim: 99.466

  np.sum((evaluation - np.mean(evaluation)) ** 2, dtype=np.float64))


525 	NSE sim: -inf 	MSE sim: 0.0007589600520254444
526 	NSE sim: 0.17666341495410265 	MSE sim: 0.00031984438277683823
660 	NSE sim: 0.2135437869091955 	MSE sim: 0.8290697051437174
531 	NSE sim: 0.1261937450458982 	MSE sim: 1.0444214734378143
532 	NSE sim: 0.047139893337830086 	MSE sim: 0.31319308180434424
587 	NSE sim: 0.23327929624973154 	MSE sim: 3.438113794432809
589 	NSE sim: 0.3191896246537357 	MSE sim: 0.03129696946250485
592 	NSE sim: 0.28785139841192253 	MSE sim: 0.4013504027551676
601 	NSE sim: 0.22285956046480015 	MSE sim: 0.24865333947494392
611 	NSE sim: 0.13533535199383795 	MSE sim: 0.1556402957473254
612 	NSE sim: 0.321871976043702 	MSE sim: 0.25390009213336284
616 	NSE sim: 0.33161998071075405 	MSE sim: 1.3734176542091583
618 	NSE sim: 0.2529685199185726 	MSE sim: 0.6769517385723777
619 	NSE sim: 0.23156656765500372 	MSE sim: 0.9306631916622202
622 	NSE sim: 0.2469887563480897 	MSE sim: 0.4465260431703854
116 	NSE sim: 0.15280595267700348 	MSE sim: 1.3357513900311029
042

In [14]:
def print_nse_mse(name, nse_dict, mse_dict, subbasins):
    nses = list(nse_dict[s] for s in subbasins)
    mses = list(mse_dict[s] for s in subbasins)
    print(name, 'Median NSE (clipped to 0)', np.median(nses), '/ Min', np.min(nses), '/ Max', np.max(nses))
    print(' ' * len(name), 'Median MSE (clipped to 0)', np.median(mses), '/ Min', np.min(mses), '/ Max', np.max(mses))
    
    return np.median(nses)

nse_median_sim_temporal = print_nse_mse('Temporal test sim', nse_sim_dict, mse_sim_dict, train_subbasins)
nse_median_sim_spatial = print_nse_mse('Spatial test sim', nse_sim_dict, mse_sim_dict, test_subbasins)
nse_median_stations_temporal = print_nse_mse('Stations temporal test', nse_dict, mse_dict, list(s for s in station_subbasins if s in train_subbasins))
nse_median_stations_spatial = print_nse_mse('Stations spatial test', nse_dict, mse_dict, list(s for s in station_subbasins if s in test_subbasins))

writer.add_scalar('nse_median_sim_temporal', nse_median_sim_temporal)
writer.add_scalar('nse_median_sim', nse_median_sim_spatial)
writer.add_scalar('nse_median_stations_temporal', nse_median_stations_temporal)
writer.add_scalar('nse_median_stations_spatial', nse_median_stations_spatial)

Temporal test sim Median NSE (clipped to 0) 0.3711084516257794 / Min -inf / Max 0.6372982501848131
                  Median MSE (clipped to 0) 7.28346181934098 / Min 0.00031984438277683823 / Max 91795.28224478338
Spatial test sim Median NSE (clipped to 0) -0.3908105101087337 / Min -906.6410667008081 / Max 0.5330437024564105
                 Median MSE (clipped to 0) 32.168309559720434 / Min 0.43866160006228877 / Max 2514016.701134857
Stations temporal test Median NSE (clipped to 0) 0.19299350104916096 / Min -1.9150608397492679 / Max 0.40638064257802387
                       Median MSE (clipped to 0) 138.38365213356118 / Min 19.518355433483084 / Max 42028.69528536788
Stations spatial test Median NSE (clipped to 0) -0.1778034911144416 / Min -6.547928199952506 / Max 0.1918258945243274
                      Median MSE (clipped to 0) 173.68754542662384 / Min 41.091637854257975 / Max 13536.860637066755


In [15]:
nse_dict

{706: 0.004832564247214122,
 705: -6.547928199952506,
 688: 0.18686841813545652,
 687: -0.8025413577559606,
 677: -1.1388291202977334,
 685: -0.007555821465756507,
 684: -0.26124432535549125,
 676: -0.09436265687339196,
 686: -1.3305154778261543,
 704: -0.3021025770967063,
 703: -0.3642033107116125,
 710: 0.1918258945243274,
 702: 0.008449848799528703,
 701: -0.028428418954447032,
 718: 0.1899221383589289,
 717: 0.21180452182946774,
 716: 0.16838160249792078,
 714: 0.29949088768836174,
 709: 0.18577860627001774,
 715: -0.23870166998561393,
 724: 0.20802434832536154,
 723: 0.25999713740893715,
 713: 0.40638064257802387,
 712: 0.22861849096050602,
 707: -0.1961240357540639,
 720: 0.0744762815457165,
 719: -0.03900396464973377,
 721: 0.22375007890911325,
 693: 0.1341966175755035,
 694: 0.19606486373939302,
 696: 0.31503389946559235,
 680: 0.37716204150018384,
 678: 0.2912112187973791,
 689: -1.9150608397492679,
 697: 0.26782327309033627,
 691: -0.07784419333607895,
 698: 0.248843245882743

In [16]:
writer.close()

In [17]:
save_df = pd.merge(predictions_df.rename({'runoff': 'prediction'}, axis=1), 
                   spatial_test_dataset.grid_dataset.simulated_streamflow, on=['date', 'subbasin'])
save_df = pd.merge(save_df, actuals.rename({'runoff': 'actual'}, axis=1), how='left', on=['date', 'station'])\
            [['date', 'subbasin', 'station', 'prediction', 'actual', 'simulated_streamflow', 'is_test_subbasin']]
load_data.pickle_results('STGCN_simulationTraining', save_df, time_stamp)

'STGCN_simulationTraining_20190822-165958.pkl'

In [18]:
_ = print(train_subbasins), print(val_subbasins), print(test_subbasins)

[112, 602, 123, 131, 124, 617, 140, 132, 130, 718, 717, 608, 144, 136, 135, 145, 664, 716, 142, 137, 141, 538, 138, 155, 139, 614, 151, 143, 603, 533, 212, 199, 146, 188, 714, 180, 189, 181, 606, 160, 205, 200, 159, 166, 615, 147, 148, 620, 218, 518, 596, 527, 668, 274, 515, 152, 187, 225, 517, 179, 241, 217, 238, 178, 514, 226, 173, 709, 237, 221, 516, 186, 245, 191, 522, 242, 210, 230, 231, 659, 655, 715, 209, 295, 262, 724, 222, 227, 175, 206, 161, 154, 247, 202, 263, 162, 246, 219, 550, 621, 174, 153, 208, 551, 723, 185, 236, 286, 184, 201, 169, 156, 593, 182, 158, 588, 163, 164, 599, 609, 165, 342, 359, 316, 223, 213, 357, 418, 232, 343, 661, 665, 374, 329, 713, 421, 459, 548, 674, 341, 380, 546, 240, 256, 541, 402, 331, 424, 353, 399, 407, 308, 528, 416, 198, 390, 344, 299, 364, 313, 440, 377, 452, 361, 296, 437, 322, 386, 425, 426, 435, 328, 349, 451, 336, 439, 666, 356, 311, 420, 540, 306, 534, 428, 461, 348, 318, 467, 228, 436, 371, 405, 229, 334, 417, 673, 300, 170, 319, 330,

In [19]:
list(k for k in list(nse_dict.keys()) if k in test_subbasins)

[706, 705, 688, 687, 677, 685, 684, 676, 686, 704, 703, 710, 702, 701]

In [20]:
datetime.now().strftime('%Y%m%d-%H%M%S')

'20190822-193713'