ConvLSTM trained on simulated streamflow.

In [1]:
%load_ext autoreload
%autoreload 2
import sys
sys.path.append('..')
import os
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt 
from datetime import datetime, timedelta
import netCDF4 as nc
import torch
from torch import nn, utils
from torch.utils.tensorboard import SummaryWriter
from src import load_data, evaluate, conv_lstm, datasets, utils, stgcn
import random
import pickle
import json
import networkx as nx

time_stamp = datetime.now().strftime('%Y%m%d-%H%M%S')
time_stamp

'20190824-103403'

In [2]:
import logging
logger = logging.getLogger()
fhandler = logging.FileHandler(filename='../log.out', mode='a')
chandler = logging.StreamHandler(sys.stdout)
formatter = logging.Formatter('%(asctime)s - {} - %(message)s'.format(time_stamp))
fhandler.setFormatter(formatter)
chandler.setFormatter(formatter)
logger.addHandler(fhandler)
logger.addHandler(chandler)
logger.setLevel(logging.INFO)

In [3]:
USE_CUDA = False
if torch.cuda.is_available():
    print('CUDA Available')
    USE_CUDA = True
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
device = torch.device('cuda:0' if USE_CUDA else 'cpu')
num_devices = torch.cuda.device_count() if USE_CUDA else 0
logger.warning('cuda devices: {}'.format(list(torch.cuda.get_device_name(i) for i in range(num_devices))))
torch.manual_seed(0)
np.random.seed(0)
random.seed(0)

CUDA Available
2019-08-24 10:34:03,912 - 20190824-103403 - cuda devices: ['Tesla V100-SXM2-16GB']


In [4]:
partitioning_strategy = 'distance'  # 'distance' or 'unilabel', see https://arxiv.org/abs/1801.07455
max_hops = 3 if partitioning_strategy == 'distance' else None
rdrs_vars = [4, 5]
agg = ['sum', 'minmax']
include_month = True
dem, landcover, soil, groundwater = True, True, False, True
landcover_types = [5, 14, 15, 17, 18]
seq_len = 8
seq_steps = 1

train_start = datetime.strptime('2010-01-01', '%Y-%m-%d') + timedelta(days=seq_len * seq_steps)  # first day for which to make a prediction in train set
train_end = '2012-12-31'
test_start = '2013-01-01'
test_end = '2014-12-31'
spatial_val = False  # Whether the val set does spatial or temporal validation
val_fraction = 0.1 if not spatial_val else None

In [5]:
with open('../data/simulations_shervan/subbasins.geojson', 'r') as f:
     subbasin_shapes = json.loads(f.read())

subbasin_graph = utils.create_subbasin_graph()
component_graph = subbasin_graph.copy()
component_graph.remove_nodes_from(['sub-1', 'sub1', 'sub474'])  # remove Lake Erie and sink to get connected components
connected_components = list(nx.connected_components(nx.Graph(component_graph)))

# Split into train/test/val regions
test_subbasins = [1, 474]
train_subbasins = []
val_subbasins = []
for component in connected_components:
    max_x = -999
    for node in component:
        subbasin = list(s['properties'] for s in subbasin_shapes['features'] if 'sub' + str(s['properties']['SubId']) == node)[0]
        max_x = max(max_x, subbasin['INSIDE_X'])
    if max_x < -81.9:
        train_subbasins += list(int(c[3:]) for c in component)
    elif -80.6 > max_x and max_x >= -81.9:
        val_subbasins += list(int(c[3:]) for c in component)
    else:
        test_subbasins += list(int(c[3:]) for c in component)
        
if not spatial_val:  # if no spatial validation, use same graph but different samples
    train_subbasins += val_subbasins
    val_subbasins = train_subbasins
    
train_subgraph = subbasin_graph.subgraph(list('sub' + str(t) for t in train_subbasins))
val_subgraph = subbasin_graph.subgraph(list('sub' + str(t) for t in val_subbasins))
test_subgraph = subbasin_graph.subgraph(list('sub' + str(t) for t in test_subbasins))
if partitioning_strategy == 'unilabel':
    train_adjacency = torch.unsqueeze(torch.from_numpy(nx.to_numpy_array(train_subgraph)), 0).float().to(device)
    val_adjacency = torch.unsqueeze(torch.from_numpy(nx.to_numpy_array(val_subgraph)), 0).float().to(device)
    test_adjacency = torch.unsqueeze(torch.from_numpy(nx.to_numpy_array(test_subraph)), 0).float().to(device)
elif partitioning_strategy == 'distance':  # use distances in upstream-graph, i.e. in reversed downstream-graph
    train_adjacency = utils.create_hop_matrix(train_subgraph.reverse(), max_hops).float().to(device)
    val_adjacency = utils.create_hop_matrix(val_subgraph.reverse(), max_hops).float().to(device)
    test_adjacency = utils.create_hop_matrix(test_subgraph.reverse(), max_hops).float().to(device)
else:
    raise Exception('Unsupported partitioning strategy')

subbasins = list(set(train_subbasins + test_subbasins + val_subbasins))

In [6]:
train_dataset = datasets.SubbasinAggregatedDataset(rdrs_vars, train_subbasins, seq_len, seq_steps, train_start, train_end, aggregate_daily=agg, include_months=include_month, 
                                                   dem=dem, landcover=landcover, soil=soil, groundwater=groundwater, landcover_types=landcover_types)
if spatial_val:
    val_dataset = datasets.SubbasinAggregatedDataset(rdrs_vars, val_subbasins, seq_len, seq_steps, train_start, train_end, aggregate_daily=agg, include_months=include_month, 
                                                     conv_scalers=train_dataset.scalers, dem=dem, landcover=landcover, soil=soil, groundwater=groundwater, landcover_types=landcover_types)
else:
    val_dataset = train_dataset

# Two test datasets: one with spatial and temporal validation (i.e., different graph, different time), and one with only temporal validation (i.e. different time period only)
spatial_test_dataset = datasets.SubbasinAggregatedDataset(rdrs_vars, test_subbasins, seq_len, seq_steps, test_start, test_end, aggregate_daily=agg, include_months=include_month, 
                                                          conv_scalers=train_dataset.scalers, dem=dem, landcover=landcover, soil=soil, groundwater=groundwater, landcover_types=landcover_types)
temporal_test_dataset = datasets.SubbasinAggregatedDataset(rdrs_vars, train_subbasins, seq_len, seq_steps, test_start, test_end, aggregate_daily=agg, include_months=include_month, 
                                                           conv_scalers=train_dataset.scalers, dem=dem, landcover=landcover, soil=soil, groundwater=groundwater, landcover_types=landcover_types)

station_subbasins = train_dataset.simulated_streamflow[~pd.isna(train_dataset.simulated_streamflow['StationID'])]['subbasin'].unique()

  
[Parallel(n_jobs=-1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=-1)]: Done 586 out of 586 | elapsed:  3.3min finished
  # This is added back by InteractiveShellApp.init_path()
[Parallel(n_jobs=-1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=-1)]: Done 138 out of 138 | elapsed:  1.8min finished
  del sys.path[0]
[Parallel(n_jobs=-1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=-1)]: Done 586 out of 586 | elapsed:  2.9min finished


In [7]:
# Train model
num_epochs = 500
learning_rate = 2e-3
patience = 300
min_improvement = 0.01
best_loss_model = (-1, np.inf, None)
dropout = 0.4
weight_decay = 1e-4

batch_size = 16
temp_kernel_size = 3
model = stgcn.Model(train_dataset.x.shape[2], train_adjacency.shape[0], temp_kernel_size, dropout=dropout).to(device)
if num_devices > 1:
    model = torch.nn.DataParallel(model, device_ids=list(range(num_devices)))
loss_fn = evaluate.NSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate, weight_decay=weight_decay)

writer = SummaryWriter(comment='STGCN_simulationTraining')
param_description = {'time_stamp': time_stamp, 'batch_size': batch_size, 'loss': loss_fn, 'include_month': include_month, 'aggregate_daily': agg, 'rdrs_vars': rdrs_vars, 'dropout': dropout, 'spatial_validation': spatial_val, 'val_fraction': val_fraction, 'temp_kernel': temp_kernel_size,
                     'optimizer': optimizer, 'lr': learning_rate, 'patience': patience, 'min_improvement': min_improvement, 'x_train_shape': train_dataset.x.shape, 'x_val_shape': val_dataset.x.shape, 'partitioning_strategy': partitioning_strategy, 'max_hops': max_hops,
                     'spatial_x_test_shape': spatial_test_dataset.x.shape, 'temporal_x_test_shape': temporal_test_dataset.x.shape, 'num_epochs': num_epochs, 'seq_len': seq_len, 'seq_steps': seq_steps, 'train_start': train_start, 'train_end': train_end, 'weight_decay': weight_decay, 
                     'landcover_types': landcover_types, 'test_start': test_start, 'test_end': test_end, 'model': str(model).replace('\n','').replace(' ', ''),}
writer.add_text('Parameter Description', str(param_description))
str(param_description)

"{'time_stamp': '20190824-103403', 'batch_size': 16, 'loss': NSELoss(), 'include_month': True, 'aggregate_daily': ['sum', 'minmax'], 'rdrs_vars': [4, 5], 'dropout': 0.4, 'spatial_validation': False, 'val_fraction': 0.1, 'temp_kernel': 3, 'optimizer': Adam (\nParameter Group 0\n    amsgrad: False\n    betas: (0.9, 0.999)\n    eps: 1e-08\n    lr: 0.002\n    weight_decay: 0.0001\n), 'lr': 0.002, 'patience': 300, 'min_improvement': 0.01, 'x_train_shape': torch.Size([1088, 8, 67, 586]), 'x_val_shape': torch.Size([1088, 8, 67, 586]), 'partitioning_strategy': 'distance', 'max_hops': 3, 'spatial_x_test_shape': torch.Size([730, 8, 67, 138]), 'temporal_x_test_shape': torch.Size([730, 8, 67, 586]), 'num_epochs': 500, 'seq_len': 8, 'seq_steps': 1, 'train_start': datetime.datetime(2010, 1, 9, 0, 0), 'train_end': '2012-12-31', 'weight_decay': 0.0001, 'landcover_types': [5, 14, 15, 17, 18], 'test_start': '2013-01-01', 'test_end': '2014-12-31', 'model': 'Model((data_bn):Identity()(st_gcn_networks):Mod

In [8]:
if spatial_val:
    train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size, shuffle=True, pin_memory=True, drop_last=False)
    val_dataloader = torch.utils.data.DataLoader(val_dataset, batch_size, shuffle=True, pin_memory=True, drop_last=False)
else:
    val_indices = np.random.choice(len(train_dataset), size=int(val_fraction * len(train_dataset)), replace=False)
    train_indices = list(i for i in range(len(train_dataset)) if i not in val_indices)
    train_sampler = torch.utils.data.SubsetRandomSampler(train_indices)
    val_sampler = torch.utils.data.SubsetRandomSampler(val_indices)
    train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size, sampler=train_sampler, pin_memory=True, drop_last=False)
    val_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size, sampler=val_sampler, pin_memory=True, drop_last=False)
    
spatial_test_dataloader = torch.utils.data.DataLoader(spatial_test_dataset, batch_size, shuffle=False, pin_memory=True, drop_last=False)
temporal_test_dataloader = torch.utils.data.DataLoader(temporal_test_dataset, batch_size, shuffle=False, pin_memory=True, drop_last=False)

In [9]:
# Subbasins with constant streamflow will divide by zero in loss calculation. Calculate loss without them.
train_non_constant_subbasin_mask = ((train_dataset.y_sim.min(dim=0)[0] - train_dataset.y_sim.max(dim=0)[0]) != 0).to(device)
val_non_constant_subbasin_mask = ((val_dataset.y_sim.min(dim=0)[0] - val_dataset.y_sim.max(dim=0)[0]) != 0).to(device)

y_train_means = train_dataset.y_sim_means[train_non_constant_subbasin_mask].to(device)
y_val_means = val_dataset.y_sim_means[val_non_constant_subbasin_mask].to(device)

In [10]:
torch.manual_seed(0)
np.random.seed(0)
for epoch in range(num_epochs):
    model.train()

    train_losses = torch.tensor(0.0)
    for i, train_batch in enumerate(train_dataloader):
        y_pred = model(train_batch['x'].permute(0,2,1,3).to(device), train_adjacency)
        train_loss = loss_fn(y_pred[:,train_non_constant_subbasin_mask], train_batch['y_sim'][:,train_non_constant_subbasin_mask].to(device), means=y_train_means)
        
        optimizer.zero_grad()
        train_loss.backward()
        optimizer.step()
        
        train_losses += train_loss.detach()
        
    train_loss = (train_losses / len(train_dataloader)).item()
    print('Epoch', epoch, 'mean train loss:\t{}'.format(train_loss))
    writer.add_scalar('loss_nse', train_loss, epoch)
    
    model.eval()
    val_losses = torch.tensor(0.0)
    for i, val_batch in enumerate(val_dataloader):
        y_pred = model(val_batch['x'].permute(0,2,1,3).to(device), val_adjacency).detach()
        val_losses += loss_fn(y_pred[:,val_non_constant_subbasin_mask], val_batch['y_sim'][:,val_non_constant_subbasin_mask].to(device), means=y_val_means).detach()
            
    val_loss = (val_losses / len(val_dataloader)).item()
    print('Epoch', epoch, 'mean val loss:\t{}'.format(val_loss))
    writer.add_scalar('loss_nse_val', val_loss, epoch)
    
    if val_loss < best_loss_model[1] - min_improvement:
        best_loss_model = (epoch, val_loss, model.state_dict())  # new best model
        load_data.pickle_model('STGCN_simulationTraining', model, 'allStations', time_stamp, model_type='torch.dill')
    elif epoch > best_loss_model[0] + patience:
        print('Patience exhausted in epoch {}. Best val-loss was {}'.format(epoch, best_loss_model[1]))
        break
    
print('Using best model from epoch', str(best_loss_model[0]), 'which had loss', str(best_loss_model[1]))
model.load_state_dict(best_loss_model[2])
load_data.save_model_with_state('STGCN_simulationTraining', best_loss_model[0], model, optimizer, time_stamp, use_dill=True)

Epoch 0 mean train loss:	5.574478626251221
Epoch 0 mean val loss:	1.5211572647094727
Saved model as /home/mgauch/runoff-nn/src/../pickle/models/STGCN_simulationTraining_allStations_20190824-103403.pkl
Epoch 1 mean train loss:	1.5132691860198975
Epoch 1 mean val loss:	1.5065582990646362
Saved model as /home/mgauch/runoff-nn/src/../pickle/models/STGCN_simulationTraining_allStations_20190824-103403.pkl
Epoch 2 mean train loss:	1.5068265199661255
Epoch 2 mean val loss:	1.5142345428466797
Epoch 3 mean train loss:	1.4296983480453491
Epoch 3 mean val loss:	1.4977525472640991
Epoch 4 mean train loss:	1.3994133472442627
Epoch 4 mean val loss:	1.382477879524231
Saved model as /home/mgauch/runoff-nn/src/../pickle/models/STGCN_simulationTraining_allStations_20190824-103403.pkl
Epoch 5 mean train loss:	1.3404569625854492
Epoch 5 mean val loss:	1.3795596361160278
Epoch 6 mean train loss:	1.2910200357437134
Epoch 6 mean val loss:	1.2374836206436157
Saved model as /home/mgauch/runoff-nn/src/../pickle/

In [11]:
del y_train_means, y_val_means, y_pred, train_non_constant_subbasin_mask, val_non_constant_subbasin_mask
if USE_CUDA:
    torch.cuda.empty_cache()

In [12]:
logger.warning('predicting')
model.eval()

spatial_test_predictions = []  # test on different graph, different time
for i, test_batch in enumerate(spatial_test_dataloader):
    pred = model(test_batch['x'].permute(0,2,1,3).to(device), test_adjacency).detach().cpu()
    spatial_test_predictions.append(pred)
    
temporal_test_predictions = []  # test on train graph but different time
for i, test_batch in enumerate(temporal_test_dataloader):
    pred = model(test_batch['x'].permute(0,2,1,3).to(device), train_adjacency).detach().cpu()
    temporal_test_predictions.append(pred)
    
predictions = torch.cat([torch.cat(spatial_test_predictions), torch.cat(temporal_test_predictions)], dim=1)

2019-08-24 11:46:05,059 - 20190824-103403 - predicting


In [13]:
actuals = spatial_test_dataset.data_streamflow.copy()
if len(actuals['date'].unique()) != len(predictions):
    print('Warning: length of prediction {} and actuals {} does not match.'.format(len(predictions), len(actuals['date'].unique())))

nse_dict, nse_sim_dict = {}, {}
mse_dict, mse_sim_dict = {}, {}
predictions_df = pd.DataFrame(columns=actuals.columns)
predictions_df['is_test_subbasin'] = False
for i in range(len(test_subbasins + train_subbasins)):
    subbasin = (test_subbasins + train_subbasins)[i]
    station = None
    subbasin_sim = spatial_test_dataset.simulated_streamflow[spatial_test_dataset.simulated_streamflow['subbasin'] == subbasin].set_index('date')
    if subbasin in station_subbasins:
        station = subbasin_sim['StationID'].values[0]
        act = actuals[actuals['station'] == station].set_index('date')['runoff']
    if predictions.shape[0] != subbasin_sim.shape[0]:
        print('Warning: length of prediction {} and actuals {} does not match for subbasin {}. Ignoring excess actuals.'.format(len(predictions), len(subbasin_sim), subbasin))
        subbasin_sim = subbasin_sim.iloc[:predictions.shape[0]]
        if station is not None:
            act = act.iloc[:predictions.shape[0]]
            
    pred = pd.DataFrame({'runoff': predictions[:,i]}, index=subbasin_sim.index)
    pred['subbasin'] = subbasin
    pred['station'] = station
    pred['is_test_subbasin'] = subbasin in test_subbasins
    predictions_df = predictions_df.append(pred.reset_index(), sort=True)
    subbasin_type = 'test' if subbasin in test_subbasins else 'train'
    nse_sim, mse_sim = evaluate.evaluate_daily('Sub{}'.format(subbasin), pred['runoff'], subbasin_sim['simulated_streamflow'], writer=writer, group=subbasin_type)
    nse_sim_dict[subbasin] = nse_sim
    mse_sim_dict[subbasin] = mse_sim

    if station is not None:
        nse, mse = evaluate.evaluate_daily(station, pred['runoff'], act, writer=writer)
        nse_dict[subbasin] = nse
        mse_dict[subbasin] = mse
        print(station, subbasin, '\tNSE:', nse, '\tMSE:', mse, '(clipped to 0)')
    print(subbasin, '\tNSE sim:', nse_sim, '\tMSE sim:', mse_sim)


To register the converters:
	>>> from pandas.plotting import register_matplotlib_converters
	>>> register_matplotlib_converters()


1 	NSE sim: -1.1640081955821677 	MSE sim: 2503601.526411812
474 	NSE sim: -4.872805664203967 	MSE sim: 6872716.658383175
3 	NSE sim: 0.34422987228420854 	MSE sim: 20.86788935465913
5 	NSE sim: -0.10742828024876205 	MSE sim: 26.86006803795577
12 	NSE sim: 0.34470873493166265 	MSE sim: 1.9283880402148703
04214500 705 	NSE: 0.15511140009744728 	MSE: 101.84041071824454 (clipped to 0)
705 	NSE sim: 0.42069125783919503 	MSE sim: 13.868858926422897
556 	NSE sim: -0.5641848849283515 	MSE sim: 363.0113591389354
11 	NSE sim: 0.2157732396940294 	MSE sim: 9.570030614780459
04215500 706 	NSE: 0.09966746708343632 	MSE: 162.5864708193986 (clipped to 0)
706 	NSE sim: 0.34571356231962624 	MSE sim: 19.687693483299785
2 	NSE sim: -0.003743186577538138 	MSE sim: 85.1951993495487
10 	NSE sim: 0.30082783592150497 	MSE sim: 3.9851661452755076
8 	NSE sim: 0.40365329369064384 	MSE sim: 4.539684698543116
04215000 688 	NSE: -0.16476484249839451 	MSE: 111.87893939559541 (clipped to 0)
688 	NSE sim: -0.29105403916

  np.sum((evaluation - np.mean(evaluation)) ** 2, dtype=np.float64))


253 	NSE sim: 0.33428936763007944 	MSE sim: 0.8326738794711598
505 	NSE sim: 0.29035091393864054 	MSE sim: 2.0180053484881983
280 	NSE sim: 0.39011192818891416 	MSE sim: 0.9936379571849385
651 	NSE sim: 0.2568435754329418 	MSE sim: 26.435790775422454
273 	NSE sim: 0.3367463887133948 	MSE sim: 0.5716888700243412
653 	NSE sim: 0.27204603471993916 	MSE sim: 160.37633915971966
671 	NSE sim: 0.37920983096750227 	MSE sim: 0.1927580534922405
506 	NSE sim: 0.37215835676930953 	MSE sim: 22.077535275239182
216 	NSE sim: 0.2869181361085046 	MSE sim: 121.86383228114147
648 	NSE sim: 0.2864600718395678 	MSE sim: 0.5180517510663729
04174500 707 	NSE: -0.1885026659338076 	MSE: 112.40173231088046 (clipped to 0)
707 	NSE sim: 0.43921783224284805 	MSE sim: 89.70789214075971
507 	NSE sim: 0.22959065166770143 	MSE sim: 0.010730804840984976
272 	NSE sim: 0.24570050296664891 	MSE sim: 12.491168867129215
215 	NSE sim: 0.40045396357808727 	MSE sim: 0.4808584780025475
509 	NSE sim: 0.31677366582474287 	MSE sim

  np.sum((evaluation - np.mean(evaluation)) ** 2, dtype=np.float64))


531 	NSE sim: 0.3343524751240461 	MSE sim: 0.795618668073846
660 	NSE sim: 0.25391970601161495 	MSE sim: 0.7865060496115249
532 	NSE sim: 0.18881115563265438 	MSE sim: 0.26662752729009814
587 	NSE sim: 0.32372711548762245 	MSE sim: 3.032529474774937
589 	NSE sim: 0.24407511843334317 	MSE sim: 0.03474999616788532
592 	NSE sim: 0.30285860565371536 	MSE sim: 0.39289268949519296
601 	NSE sim: 0.359113360649692 	MSE sim: 0.20505766395921812
611 	NSE sim: 0.2074143019407345 	MSE sim: 0.14266602981340495
612 	NSE sim: 0.36177048839236947 	MSE sim: 0.23896156193930085
616 	NSE sim: 0.44396561023950454 	MSE sim: 1.1425647464096294
618 	NSE sim: 0.35914774611249856 	MSE sim: 0.580733287692084
619 	NSE sim: 0.27868448311882366 	MSE sim: 0.8735978588119675
622 	NSE sim: 0.20577386306511913 	MSE sim: 0.47096594811533976
126 	NSE sim: 0.18213081914907647 	MSE sim: 64.3292769872747
129 	NSE sim: 0.1695043270407589 	MSE sim: 4.078631978127846
539 	NSE sim: 0.19388501684887804 	MSE sim: 0.0156668432258

In [14]:
def print_nse_mse(name, nse_dict, mse_dict, subbasins):
    nses = list(nse_dict[s] for s in subbasins)
    mses = list(mse_dict[s] for s in subbasins)
    print(name, 'Median NSE (clipped to 0)', np.nanmedian(nses), '/ Min', np.nanmin(nses), '/ Max', np.nanmax(nses))
    print(' ' * len(name), 'Median MSE (clipped to 0)', np.nanmedian(mses), '/ Min', np.nanmin(mses), '/ Max', np.nanmax(mses))
    
    return np.nanmedian(nses)

nse_median_sim_temporal = print_nse_mse('Temporal test sim', nse_sim_dict, mse_sim_dict, train_subbasins)
nse_median_sim_spatial = print_nse_mse('Spatial test sim', nse_sim_dict, mse_sim_dict, test_subbasins)
nse_median_stations_temporal = print_nse_mse('Stations temporal test', nse_dict, mse_dict, list(s for s in station_subbasins if s in train_subbasins))
nse_median_stations_spatial = print_nse_mse('Stations spatial test', nse_dict, mse_dict, list(s for s in station_subbasins if s in test_subbasins))

writer.add_scalar('nse_median_sim_temporal', nse_median_sim_temporal)
writer.add_scalar('nse_median_sim', nse_median_sim_spatial)
writer.add_scalar('nse_median_stations_temporal', nse_median_stations_temporal)
writer.add_scalar('nse_median_stations_spatial', nse_median_stations_spatial)

Temporal test sim Median NSE (clipped to 0) 0.3497039605493718 / Min -0.9537912389460308 / Max 0.6860227456695958
                  Median MSE (clipped to 0) 7.681517493462582 / Min 0.00033718099623170463 / Max 92158.67849395073
Spatial test sim Median NSE (clipped to 0) -0.21335975638639448 / Min -6256.5940258251 / Max 0.539003043742434
                 Median MSE (clipped to 0) 27.490205767772757 / Min 0.2614987380820067 / Max 6872716.658383175
Stations temporal test Median NSE (clipped to 0) 0.17419770300897014 / Min -4.754155947937389 / Max 0.35508144303925915
                       Median MSE (clipped to 0) 142.73918825439526 / Min 19.992712287357318 / Max 53700.378642799165
Stations spatial test Median NSE (clipped to 0) -0.0841266751850196 / Min -12.33801852325694 / Max 0.15511140009744728
                      Median MSE (clipped to 0) 184.04925226383637 / Min 27.016850259674797 / Max 13533.437025245012


In [15]:
nse_dict

{705: 0.15511140009744728,
 706: 0.09966746708343632,
 688: -0.16476484249839451,
 685: 0.06897538106328682,
 684: -0.13353774155068487,
 687: -0.8020854764355383,
 686: 0.13606940049030758,
 677: -7.771180543273632,
 676: -0.034715608819354316,
 704: -0.026752639068796435,
 703: -0.7375304890469496,
 710: 0.10943825149959074,
 701: -12.33801852325694,
 702: -0.2301474095392455,
 718: 0.13464087542013337,
 717: 0.1626776653081834,
 716: 0.11229095962481228,
 714: 0.23059229228218558,
 709: 0.1555184542895105,
 723: 0.2389284638659629,
 715: 0.23704420356586187,
 724: 0.13291823255670288,
 713: 0.24152810248303602,
 712: 0.24138907519197217,
 707: -0.1885026659338076,
 719: 0.005207499614239275,
 720: 0.008508102405257123,
 721: 0.15145005717489346,
 689: -4.754155947937389,
 693: 0.09366903162817053,
 678: 0.35508144303925915,
 691: 0.010266478331814821,
 680: 0.32153919046683566,
 696: 0.2529710161914507,
 682: 0.19096891435785512,
 694: 0.18571774070975688,
 683: 0.20045414260004202,

In [16]:
writer.close()

In [17]:
save_df = pd.merge(predictions_df.rename({'runoff': 'prediction'}, axis=1), 
                   spatial_test_dataset.simulated_streamflow, on=['date', 'subbasin'])
save_df = pd.merge(save_df, actuals.rename({'runoff': 'actual'}, axis=1), how='left', on=['date', 'station'])\
            [['date', 'subbasin', 'station', 'prediction', 'actual', 'simulated_streamflow', 'is_test_subbasin']]
load_data.pickle_results('STGCN_simulationTraining', save_df, time_stamp)

'STGCN_simulationTraining_20190824-103403.pkl'

In [18]:
_ = print(train_subbasins), print(val_subbasins), print(test_subbasins)

[602, 112, 130, 132, 617, 140, 124, 131, 718, 123, 135, 608, 136, 145, 717, 144, 138, 664, 137, 142, 155, 538, 716, 141, 139, 614, 603, 533, 143, 151, 205, 199, 606, 200, 188, 180, 159, 181, 146, 714, 212, 160, 189, 166, 147, 615, 620, 148, 231, 245, 217, 178, 237, 241, 514, 187, 659, 522, 709, 274, 221, 152, 517, 226, 518, 179, 238, 516, 242, 225, 527, 173, 515, 655, 210, 218, 230, 668, 186, 191, 596, 185, 246, 184, 723, 236, 219, 295, 551, 621, 174, 206, 227, 153, 247, 550, 262, 715, 161, 154, 202, 263, 175, 162, 208, 209, 222, 201, 286, 724, 169, 156, 158, 593, 182, 588, 163, 164, 599, 165, 609, 387, 439, 425, 340, 239, 330, 232, 386, 422, 546, 442, 357, 297, 171, 390, 358, 381, 296, 347, 394, 423, 235, 319, 428, 256, 318, 399, 207, 228, 540, 332, 549, 674, 404, 362, 365, 411, 322, 395, 524, 320, 377, 415, 341, 317, 407, 195, 420, 441, 713, 306, 364, 374, 438, 455, 607, 418, 403, 436, 313, 416, 308, 327, 352, 421, 267, 170, 541, 534, 446, 311, 430, 460, 450, 342, 402, 349, 429, 523,

In [19]:
list(k for k in list(nse_dict.keys()) if k in test_subbasins)

[705, 706, 688, 685, 684, 687, 686, 677, 676, 704, 703, 710, 701, 702]

In [20]:
datetime.now().strftime('%Y%m%d-%H%M%S')

'20190824-114726'