ConvLSTM trained on simulated streamflow.

In [1]:
%load_ext autoreload
%autoreload 2
import sys
sys.path.append('..')
import os
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt 
from datetime import datetime, timedelta
import netCDF4 as nc
import torch
from torch import nn, utils
from torch.utils.tensorboard import SummaryWriter
from src import load_data, evaluate, conv_lstm, datasets, utils, stgcn
import random
import pickle
import json
import networkx as nx

time_stamp = datetime.now().strftime('%Y%m%d-%H%M%S')
time_stamp

'20190822-162826'

In [2]:
import logging
logger = logging.getLogger()
fhandler = logging.FileHandler(filename='../log.out', mode='a')
chandler = logging.StreamHandler(sys.stdout)
formatter = logging.Formatter('%(asctime)s - {} - %(message)s'.format(time_stamp))
fhandler.setFormatter(formatter)
chandler.setFormatter(formatter)
logger.addHandler(fhandler)
logger.addHandler(chandler)
logger.setLevel(logging.INFO)

In [3]:
USE_CUDA = False
if torch.cuda.is_available():
    print('CUDA Available')
    USE_CUDA = True
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
device = torch.device('cuda:0' if USE_CUDA else 'cpu')
num_devices = torch.cuda.device_count() if USE_CUDA else 0
logger.warning('cuda devices: {}'.format(list(torch.cuda.get_device_name(i) for i in range(num_devices))))
torch.manual_seed(0)
np.random.seed(0)
random.seed(0)

CUDA Available
2019-08-22 16:28:26,368 - 20190822-162826 - cuda devices: ['Tesla V100-SXM2-16GB']


In [4]:
partitioning_strategy = 'distance'  # 'distance' or 'unilabel', see https://arxiv.org/abs/1801.07455
max_hops = 10 if partitioning_strategy == 'distance' else None
rdrs_vars = list(range(8))
agg = ['minmax','minmax','minmax','sum','minmax','minmax','minmax','minmax']
include_month = True
dem, landcover, soil, groundwater = True, False, False, False
landcover_types = []
seq_len = 8
seq_steps = 1

train_start = datetime.strptime('2010-01-01', '%Y-%m-%d') + timedelta(days=seq_len * seq_steps)  # first day for which to make a prediction in train set
train_end = '2012-12-31'
test_start = '2013-01-01'
test_end = '2014-12-31'
spatial_val = False  # Whether the val set does spatial or temporal validation
val_fraction = 0.1 if not spatial_val else None

In [5]:
with open('../data/simulations_shervan/subbasins.geojson', 'r') as f:
     subbasin_shapes = json.loads(f.read())

subbasin_graph = utils.create_subbasin_graph()
component_graph = subbasin_graph.copy()
component_graph.remove_nodes_from(['sub-1', 'sub1', 'sub474'])  # remove Lake Erie and sink to get connected components
connected_components = list(nx.connected_components(nx.Graph(component_graph)))

# Split into train/test/val regions
test_subbasins = [1, 474]
train_subbasins = []
val_subbasins = []
for component in connected_components:
    max_x = -999
    for node in component:
        subbasin = list(s['properties'] for s in subbasin_shapes['features'] if 'sub' + str(s['properties']['SubId']) == node)[0]
        max_x = max(max_x, subbasin['INSIDE_X'])
    if max_x < -81.9:
        train_subbasins += list(int(c[3:]) for c in component)
    elif -80.6 > max_x and max_x >= -81.9:
        val_subbasins += list(int(c[3:]) for c in component)
    else:
        test_subbasins += list(int(c[3:]) for c in component)
        
if not spatial_val:  # if no spatial validation, use same graph but different samples
    train_subbasins += val_subbasins
    val_subbasins = train_subbasins
    
train_subgraph = subbasin_graph.subgraph(list('sub' + str(t) for t in train_subbasins))
val_subgraph = subbasin_graph.subgraph(list('sub' + str(t) for t in val_subbasins))
test_subgraph = subbasin_graph.subgraph(list('sub' + str(t) for t in test_subbasins))
if partitioning_strategy == 'unilabel':
    train_adjacency = torch.unsqueeze(torch.from_numpy(nx.to_numpy_array(train_subraph)), 0).float().to(device)
    val_adjacency = torch.unsqueeze(torch.from_numpy(nx.to_numpy_array(val_subgraph)), 0).float().to(device)
    test_adjacency = torch.unsqueeze(torch.from_numpy(nx.to_numpy_array(test_subraph)), 0).float().to(device)
elif partitioning_strategy == 'distance':  # use distances in upstream-graph, i.e. in reversed downstream-graph
    train_adjacency = utils.create_hop_matrix(train_subgraph.reverse(), max_hops).float().to(device)
    val_adjacency = utils.create_hop_matrix(val_subgraph.reverse(), max_hops).float().to(device)
    test_adjacency = utils.create_hop_matrix(test_subgraph.reverse(), max_hops).float().to(device)
else:
    raise Exception('Unsupported partitioning strategy')

subbasins = list(set(train_subbasins + test_subbasins + val_subbasins))

In [6]:
train_dataset = datasets.SubbasinAggregatedDataset(rdrs_vars, train_subbasins, seq_len, seq_steps, train_start, train_end, aggregate_daily=agg, include_months=include_month, 
                                                   dem=dem, landcover=landcover, soil=soil, groundwater=groundwater, landcover_types=landcover_types)
if spatial_val:
    val_dataset = datasets.SubbasinAggregatedDataset(rdrs_vars, val_subbasins, seq_len, seq_steps, train_start, train_end, aggregate_daily=agg, include_months=include_month, 
                                                     conv_scalers=train_dataset.grid_dataset.conv_scalers, dem=dem, landcover=landcover, soil=soil, groundwater=groundwater, landcover_types=landcover_types)
else:
    val_dataset = train_dataset

# Two test datasets: one with spatial and temporal validation (i.e., different graph, different time), and one with only temporal validation (i.e. different time period only)
spatial_test_dataset = datasets.SubbasinAggregatedDataset(rdrs_vars, test_subbasins, seq_len, seq_steps, test_start, test_end, aggregate_daily=agg, include_months=include_month, 
                                                          conv_scalers=train_dataset.grid_dataset.conv_scalers, dem=dem, landcover=landcover, soil=soil, groundwater=groundwater, landcover_types=landcover_types)
temporal_test_dataset = datasets.SubbasinAggregatedDataset(rdrs_vars, train_subbasins, seq_len, seq_steps, test_start, test_end, aggregate_daily=agg, include_months=include_month, 
                                                           conv_scalers=train_dataset.grid_dataset.conv_scalers, dem=dem, landcover=landcover, soil=soil, groundwater=groundwater, landcover_types=landcover_types)

station_subbasins = train_dataset.grid_dataset.simulated_streamflow[~pd.isna(train_dataset.grid_dataset.simulated_streamflow['StationID'])]['subbasin'].unique()

  


Loading subbasin shapes
Aggregating into subbasins


  # This is added back by InteractiveShellApp.init_path()


Loading subbasin shapes
Aggregating into subbasins


  del sys.path[0]


Loading subbasin shapes
Aggregating into subbasins


In [7]:
# Train model
num_epochs = 500
learning_rate = 2e-3
patience = 300
min_improvement = 0.01
best_loss_model = (-1, np.inf, None)
dropout = 0.2
weight_decay = 1e-5

batch_size = 4
model = stgcn.Model(train_dataset.x.shape[2], train_adjacency.shape[0]).to(device)
if num_devices > 1:
    model = torch.nn.DataParallel(model, device_ids=list(range(num_devices)))
loss_fn = evaluate.NSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate, weight_decay=weight_decay)

writer = SummaryWriter(comment='STGCN_simulationTraining')
param_description = {'time_stamp': time_stamp, 'batch_size': batch_size, 'loss': loss_fn, 'include_month': include_month, 'aggregate_daily': agg, 'rdrs_vars': rdrs_vars, 'dropout': dropout, 'spatial_validation': spatial_val, 'val_fraction': val_fraction,
                     'optimizer': optimizer, 'lr': learning_rate, 'patience': patience, 'min_improvement': min_improvement, 'x_train_shape': train_dataset.x.shape, 'x_val_shape': val_dataset.x.shape, 'partitioning_strategy': partitioning_strategy, 'max_hops': max_hops,
                     'spatial_x_test_shape': spatial_test_dataset.x.shape, 'temporal_x_test_shape': temporal_test_dataset.x.shape, 'num_epochs': num_epochs, 'seq_len': seq_len, 'seq_steps': seq_steps, 'train_start': train_start, 'train_end': train_end, 'weight_decay': weight_decay, 
                     'landcover_types': landcover_types, 'test_start': test_start, 'test_end': test_end, 'model': str(model).replace('\n','').replace(' ', ''),}
writer.add_text('Parameter Description', str(param_description))
str(param_description)

"{'time_stamp': '20190822-162826', 'batch_size': 4, 'loss': NSELoss(), 'include_month': True, 'aggregate_daily': ['minmax', 'minmax', 'minmax', 'sum', 'minmax', 'minmax', 'minmax', 'minmax'], 'rdrs_vars': [0, 1, 2, 3, 4, 5, 6, 7], 'dropout': 0.2, 'spatial_validation': False, 'val_fraction': 0.1, 'optimizer': Adam (\nParameter Group 0\n    amsgrad: False\n    betas: (0.9, 0.999)\n    eps: 1e-08\n    lr: 0.002\n    weight_decay: 1e-05\n), 'lr': 0.002, 'patience': 300, 'min_improvement': 0.01, 'x_train_shape': torch.Size([1088, 8, 84, 586]), 'x_val_shape': torch.Size([1088, 8, 84, 586]), 'partitioning_strategy': 'distance', 'max_hops': 10, 'spatial_x_test_shape': torch.Size([730, 8, 84, 138]), 'temporal_x_test_shape': torch.Size([730, 8, 84, 586]), 'num_epochs': 500, 'seq_len': 8, 'seq_steps': 1, 'train_start': datetime.datetime(2010, 1, 9, 0, 0), 'train_end': '2012-12-31', 'weight_decay': 1e-05, 'landcover_types': [], 'test_start': '2013-01-01', 'test_end': '2014-12-31', 'model': 'Model(

In [8]:
if spatial_val:
    train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size, shuffle=True, pin_memory=True, drop_last=False)
    val_dataloader = torch.utils.data.DataLoader(val_dataset, batch_size, shuffle=True, pin_memory=True, drop_last=False)
else:
    val_indices = np.random.choice(len(train_dataset), size=int(val_fraction * len(train_dataset)), replace=False)
    train_indices = list(i for i in range(len(train_dataset)) if i not in val_indices)
    train_sampler = torch.utils.data.SubsetRandomSampler(train_indices)
    val_sampler = torch.utils.data.SubsetRandomSampler(val_indices)
    train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size, sampler=train_sampler, pin_memory=True, drop_last=False)
    val_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size, sampler=val_sampler, pin_memory=True, drop_last=False)
    
spatial_test_dataloader = torch.utils.data.DataLoader(spatial_test_dataset, batch_size, shuffle=False, pin_memory=True, drop_last=False)
temporal_test_dataloader = torch.utils.data.DataLoader(temporal_test_dataset, batch_size, shuffle=False, pin_memory=True, drop_last=False)

In [9]:
# Subbasins with constant streamflow will divide by zero in loss calculation. Calculate loss without them.
train_non_constant_subbasin_mask = ((train_dataset.y_sim.min(dim=0)[0] - train_dataset.y_sim.max(dim=0)[0]) != 0).to(device)
val_non_constant_subbasin_mask = ((val_dataset.y_sim.min(dim=0)[0] - val_dataset.y_sim.max(dim=0)[0]) != 0).to(device)

y_train_means = train_dataset.y_sim_means[train_non_constant_subbasin_mask].to(device)
y_val_means = val_dataset.y_sim_means[val_non_constant_subbasin_mask].to(device)

In [10]:
torch.manual_seed(0)
np.random.seed(0)
for epoch in range(num_epochs):
    model.train()

    train_losses = torch.tensor(0.0)
    for i, train_batch in enumerate(train_dataloader):
        y_pred = model(train_batch['x'].permute(0,2,1,3).to(device), train_adjacency)
        train_loss = loss_fn(y_pred[:,train_non_constant_subbasin_mask], train_batch['y_sim'][:,train_non_constant_subbasin_mask].to(device), means=y_train_means)
        
        optimizer.zero_grad()
        train_loss.backward()
        optimizer.step()
        
        train_losses += train_loss.detach()
        
    train_loss = (train_losses / len(train_dataloader)).item()
    print('Epoch', epoch, 'mean train loss:\t{}'.format(train_loss))
    writer.add_scalar('loss_nse', train_loss, epoch)
    
    model.eval()
    val_losses = torch.tensor(0.0)
    for i, val_batch in enumerate(val_dataloader):
        y_pred = model(val_batch['x'].permute(0,2,1,3).to(device), val_adjacency).detach()
        val_losses += loss_fn(y_pred[:,val_non_constant_subbasin_mask], val_batch['y_sim'][:,val_non_constant_subbasin_mask].to(device), means=y_val_means).detach()
            
    val_loss = (val_losses / len(val_dataloader)).item()
    print('Epoch', epoch, 'mean val loss:\t{}'.format(val_loss))
    writer.add_scalar('loss_nse_val', val_loss, epoch)
    
    if val_loss < best_loss_model[1] - min_improvement:
        best_loss_model = (epoch, val_loss, model.state_dict())  # new best model
        load_data.pickle_model('STGCN_simulationTraining', model, 'allStations', time_stamp, model_type='torch.dill')
    elif epoch > best_loss_model[0] + patience:
        print('Patience exhausted in epoch {}. Best val-loss was {}'.format(epoch, best_loss_model[1]))
        break
    
print('Using best model from epoch', str(best_loss_model[0]), 'which had loss', str(best_loss_model[1]))
model.load_state_dict(best_loss_model[2])
load_data.save_model_with_state('STGCN_simulationTraining', best_loss_model[0], model, optimizer, time_stamp, use_dill=True)

Epoch 0 mean train loss:	6.751997947692871
Epoch 0 mean val loss:	1.4493485689163208
Saved model as /home/mgauch/runoff-nn/src/../pickle/models/STGCN_simulationTraining_allStations_20190822-162826.pkl
Epoch 1 mean train loss:	1.49709153175354
Epoch 1 mean val loss:	1.5684771537780762
Epoch 2 mean train loss:	1.4450905323028564
Epoch 2 mean val loss:	1.5495587587356567
Epoch 3 mean train loss:	1.4571224451065063
Epoch 3 mean val loss:	1.5029678344726562
Epoch 4 mean train loss:	1.4150336980819702
Epoch 4 mean val loss:	1.5786062479019165
Epoch 5 mean train loss:	1.4248219728469849
Epoch 5 mean val loss:	1.8913803100585938
Epoch 6 mean train loss:	1.3952255249023438
Epoch 6 mean val loss:	1.4139021635055542
Saved model as /home/mgauch/runoff-nn/src/../pickle/models/STGCN_simulationTraining_allStations_20190822-162826.pkl
Epoch 7 mean train loss:	1.4371306896209717
Epoch 7 mean val loss:	1.426726222038269
Epoch 8 mean train loss:	1.4328585863113403
Epoch 8 mean val loss:	1.603983402252197

In [11]:
del y_train_means, y_val_means, y_pred, train_non_constant_subbasin_mask, val_non_constant_subbasin_mask
if USE_CUDA:
    torch.cuda.empty_cache()

In [12]:
logger.warning('predicting')
model.eval()

spatial_test_predictions = []  # test on different graph, different time
for i, test_batch in enumerate(spatial_test_dataloader):
    pred = model(test_batch['x'].permute(0,2,1,3).to(device), test_adjacency).detach().cpu()
    spatial_test_predictions.append(pred)
    
temporal_test_predictions = []  # test on train graph but different time
for i, test_batch in enumerate(temporal_test_dataloader):
    pred = model(test_batch['x'].permute(0,2,1,3).to(device), train_adjacency).detach().cpu()
    temporal_test_predictions.append(pred)
    
predictions = torch.cat([torch.cat(spatial_test_predictions), torch.cat(temporal_test_predictions)], dim=1)

2019-08-22 20:23:06,896 - 20190822-162826 - predicting


In [13]:
actuals = spatial_test_dataset.grid_dataset.data_runoff.copy()
if len(actuals['date'].unique()) != len(predictions):
    print('Warning: length of prediction {} and actuals {} does not match.'.format(len(predictions), len(actuals['date'].unique())))

nse_dict, nse_sim_dict = {}, {}
mse_dict, mse_sim_dict = {}, {}
predictions_df = pd.DataFrame(columns=actuals.columns)
predictions_df['is_test_subbasin'] = False
for i in range(len(test_subbasins + train_subbasins)):
    subbasin = (test_subbasins + train_subbasins)[i]
    station = None
    subbasin_sim = spatial_test_dataset.grid_dataset.simulated_streamflow[spatial_test_dataset.grid_dataset.simulated_streamflow['subbasin'] == subbasin].set_index('date')
    if subbasin in station_subbasins:
        station = subbasin_sim['StationID'].values[0]
        act = actuals[actuals['station'] == station].set_index('date')['runoff']
    if predictions.shape[0] != subbasin_sim.shape[0]:
        print('Warning: length of prediction {} and actuals {} does not match for subbasin {}. Ignoring excess actuals.'.format(len(predictions), len(subbasin_sim), subbasin))
        subbasin_sim = subbasin_sim.iloc[:predictions.shape[0]]
        if station is not None:
            act = act.iloc[:predictions.shape[0]]
            
    pred = pd.DataFrame({'runoff': predictions[:,i]}, index=subbasin_sim.index)
    pred['subbasin'] = subbasin
    pred['station'] = station
    pred['is_test_subbasin'] = subbasin in test_subbasins
    predictions_df = predictions_df.append(pred.reset_index(), sort=True)
    subbasin_type = 'test' if subbasin in test_subbasins else 'train'
    nse_sim, mse_sim = evaluate.evaluate_daily('Sub{}'.format(subbasin), pred['runoff'], subbasin_sim['simulated_streamflow'], writer=writer, group=subbasin_type)
    nse_sim_dict[subbasin] = nse_sim
    mse_sim_dict[subbasin] = mse_sim

    if station is not None:
        nse, mse = evaluate.evaluate_daily(station, pred['runoff'], act, writer=writer)
        nse_dict[subbasin] = nse
        mse_dict[subbasin] = mse
        print(station, subbasin, '\tNSE:', nse, '\tMSE:', mse, '(clipped to 0)')
    print(subbasin, '\tNSE sim:', nse_sim, '\tMSE sim:', mse_sim)


To register the converters:
	>>> from pandas.plotting import register_matplotlib_converters
	>>> register_matplotlib_converters()


1 	NSE sim: -1.1767881879612556 	MSE sim: 2518387.056565134
474 	NSE sim: -1.1323870504907898 	MSE sim: 2495449.8483332796
3 	NSE sim: 0.32130265170677985 	MSE sim: 21.597478401183984
2 	NSE sim: 0.06978521637258928 	MSE sim: 78.95429327819662
5 	NSE sim: -59.76397449154092 	MSE sim: 1473.7970107940293
556 	NSE sim: -0.34917016450926774 	MSE sim: 313.1113846242315
04215500 706 	NSE: 0.1712868579288983 	MSE: 149.65308945853877 (clipped to 0)
706 	NSE sim: 0.4288981127004923 	MSE sim: 17.18464308193316
4 	NSE sim: -0.43142270728244436 	MSE sim: 24.390749115232648
04214500 705 	NSE: 0.028558839314317908 	MSE: 117.0946877543958 (clipped to 0)
705 	NSE sim: -1.2852233183948432 	MSE sim: 54.70906532494997
9 	NSE sim: -58.691241591504706 	MSE sim: 242.294025589096
8 	NSE sim: -8.7047421716147 	MSE sim: 73.87727486991596
11 	NSE sim: -2.1922139935023326 	MSE sim: 38.95504105831433
10 	NSE sim: -2.7723657161937796 	MSE sim: 21.501863077726384
04215000 688 	NSE: 0.11214881718190983 	MSE: 85.2806

  np.sum((evaluation - np.mean(evaluation)) ** 2, dtype=np.float64))


499 	NSE sim: -inf 	MSE sim: 0.4769828795803605
273 	NSE sim: 0.2903137589258131 	MSE sim: 0.6117112946350841
505 	NSE sim: 0.2890105918183842 	MSE sim: 2.0218167776304328
512 	NSE sim: -0.6289656134048556 	MSE sim: 381.824620475148
272 	NSE sim: 0.35045700993249396 	MSE sim: 10.756405389773864
503 	NSE sim: 0.3575278661980532 	MSE sim: 0.017338416891503658
253 	NSE sim: 0.37489329866753673 	MSE sim: 0.7818862982988771
656 	NSE sim: 0.3084357831645872 	MSE sim: 0.8533959432994838
519 	NSE sim: 0.23657346059719742 	MSE sim: 1.655310873040337
520 	NSE sim: 0.36335106364477265 	MSE sim: 0.15597229418295952
657 	NSE sim: 0.36588901746575964 	MSE sim: 0.025183042530750883
658 	NSE sim: 0.3440476662214299 	MSE sim: 1.983847800745448
521 	NSE sim: 0.3937908511600111 	MSE sim: 2.65032537103914
525 	NSE sim: -inf 	MSE sim: 0.0003206914238388642


  np.sum((evaluation - np.mean(evaluation)) ** 2, dtype=np.float64))


526 	NSE sim: 0.32761669861921916 	MSE sim: 0.00026120304371947726
660 	NSE sim: 0.31464278892368214 	MSE sim: 0.7224927357011771
531 	NSE sim: 0.3388072865113473 	MSE sim: 0.7902940315807632
532 	NSE sim: 0.3503783046170468 	MSE sim: 0.21352244611924381
587 	NSE sim: 0.23741649605573356 	MSE sim: 3.4195618450022467
589 	NSE sim: 0.3459366040360312 	MSE sim: 0.030067406243060382
592 	NSE sim: 0.38797073662868997 	MSE sim: 0.34492547033618465
601 	NSE sim: 0.43643047257161793 	MSE sim: 0.18031933212122592
611 	NSE sim: 0.17510558638814155 	MSE sim: 0.1484816232407726
612 	NSE sim: 0.356154657859008 	MSE sim: 0.24106420309178875
616 	NSE sim: 0.32624612335141256 	MSE sim: 1.3844601006550736
618 	NSE sim: 0.4043093750712795 	MSE sim: 0.5398083145743849
619 	NSE sim: 0.3937547031257971 	MSE sim: 0.7342342995116431
622 	NSE sim: 0.28502032337763816 	MSE sim: 0.4239738100072857
04208504 719 	NSE: -0.20815409332420454 	MSE: 1179.7970932039912 (clipped to 0)
719 	NSE sim: 0.07502924197125616 	

In [14]:
def print_nse_mse(name, nse_dict, mse_dict, subbasins):
    nses = list(nse_dict[s] for s in subbasins)
    mses = list(mse_dict[s] for s in subbasins)
    print(name, 'Median NSE (clipped to 0)', np.median(nses), '/ Min', np.min(nses), '/ Max', np.max(nses))
    print(' ' * len(name), 'Median MSE (clipped to 0)', np.median(mses), '/ Min', np.min(mses), '/ Max', np.max(mses))
    
    return np.median(nses)

nse_median_sim_temporal = print_nse_mse('Temporal test sim', nse_sim_dict, mse_sim_dict, train_subbasins)
nse_median_sim_spatial = print_nse_mse('Spatial test sim', nse_sim_dict, mse_sim_dict, test_subbasins)
nse_median_stations_temporal = print_nse_mse('Stations temporal test', nse_dict, mse_dict, list(s for s in station_subbasins if s in train_subbasins))
nse_median_stations_spatial = print_nse_mse('Stations spatial test', nse_dict, mse_dict, list(s for s in station_subbasins if s in test_subbasins))

writer.add_scalar('nse_median_sim_temporal', nse_median_sim_temporal)
writer.add_scalar('nse_median_sim', nse_median_sim_spatial)
writer.add_scalar('nse_median_stations_temporal', nse_median_stations_temporal)
writer.add_scalar('nse_median_stations_spatial', nse_median_stations_spatial)

Temporal test sim Median NSE (clipped to 0) 0.3250454124980033 / Min -inf / Max 0.5643296150256618
                  Median MSE (clipped to 0) 8.009289141584048 / Min 0.00026120304371947726 / Max 49541.81029849028
Spatial test sim Median NSE (clipped to 0) -0.36472592384766966 / Min -1896.4926897064734 / Max 0.4288981127004923
                 Median MSE (clipped to 0) 48.96435563286653 / Min 0.3785422012085791 / Max 2518387.056565134
Stations temporal test Median NSE (clipped to 0) 0.12306254489620827 / Min -1.2073332048906846 / Max 0.23837178028904382
                       Median MSE (clipped to 0) 147.97567360667233 / Min 16.187053557258018 / Max 97229.25372018827
Stations spatial test Median NSE (clipped to 0) -0.13933320429740204 / Min -8.906380591159545 / Max 0.20179890817289425
                      Median MSE (clipped to 0) 169.1709392988734 / Min 62.67039156022089 / Max 14517.978461061173


In [15]:
nse_dict

{706: 0.1712868579288983,
 705: 0.028558839314317908,
 688: 0.11214881718190983,
 684: -0.16042183931074327,
 687: -0.9331850499676491,
 685: -0.48456630115498434,
 686: -8.906380591159545,
 676: -0.1182445692840608,
 677: -0.9689704571320985,
 704: 0.18801618314793267,
 703: -1.0805974187078382,
 710: -0.2646479533745012,
 701: -0.05998923919063848,
 702: 0.20179890817289425,
 718: 0.09468558257289506,
 717: 0.14610467507986635,
 716: 0.09000923935782257,
 714: 0.19840342033830383,
 709: 0.21903868056403875,
 715: -0.24594831577975285,
 723: 0.17940047343967358,
 724: 0.13938792496630137,
 713: -0.3732800108141936,
 712: 0.16911892628456482,
 707: -0.14412846880512342,
 719: -0.20815409332420454,
 720: 0.03271336742616193,
 721: 0.026946616127469847,
 690: -0.15619885503967357,
 682: 0.105509028238991,
 697: 0.17344418567979025,
 692: 0.14832717749102609,
 696: 0.19889803037717857,
 680: 0.23837178028904382,
 695: 0.1891886671778218,
 683: 0.12646399850679446,
 681: 0.1346627466640368

In [16]:
writer.close()

In [17]:
save_df = pd.merge(predictions_df.rename({'runoff': 'prediction'}, axis=1), 
                   spatial_test_dataset.grid_dataset.simulated_streamflow, on=['date', 'subbasin'])
save_df = pd.merge(save_df, actuals.rename({'runoff': 'actual'}, axis=1), how='left', on=['date', 'station'])\
            [['date', 'subbasin', 'station', 'prediction', 'actual', 'simulated_streamflow', 'is_test_subbasin']]
load_data.pickle_results('STGCN_simulationTraining', save_df, time_stamp)

'STGCN_simulationTraining_20190822-162826.pkl'

In [18]:
_ = print(train_subbasins), print(val_subbasins), print(test_subbasins)

[602, 112, 124, 131, 718, 130, 617, 123, 140, 132, 717, 136, 608, 144, 145, 135, 138, 137, 538, 141, 716, 155, 664, 142, 614, 139, 603, 533, 143, 151, 606, 160, 181, 180, 212, 189, 146, 714, 159, 205, 188, 199, 200, 615, 147, 166, 620, 148, 231, 274, 152, 659, 178, 186, 514, 245, 173, 668, 226, 515, 230, 217, 218, 191, 709, 242, 655, 241, 517, 237, 179, 210, 225, 596, 516, 238, 522, 527, 518, 187, 221, 222, 174, 184, 162, 247, 295, 154, 551, 227, 206, 185, 201, 153, 262, 209, 208, 175, 236, 715, 202, 621, 723, 550, 161, 724, 286, 263, 219, 246, 156, 169, 593, 158, 182, 588, 163, 164, 599, 165, 609, 355, 362, 311, 673, 430, 426, 415, 437, 329, 234, 310, 352, 387, 266, 381, 424, 405, 365, 523, 390, 316, 357, 239, 353, 363, 407, 425, 334, 393, 461, 414, 524, 440, 417, 666, 422, 467, 394, 548, 382, 452, 232, 347, 549, 356, 429, 540, 332, 375, 330, 420, 213, 358, 342, 528, 459, 402, 313, 235, 542, 674, 321, 442, 451, 403, 427, 436, 419, 661, 408, 328, 233, 297, 376, 341, 409, 534, 320, 455,

In [19]:
list(k for k in list(nse_dict.keys()) if k in test_subbasins)

[706, 705, 688, 684, 687, 685, 686, 676, 677, 704, 703, 710, 701, 702]

In [20]:
datetime.now().strftime('%Y%m%d-%H%M%S')

'20190822-202459'