ConvLSTM trained on simulated streamflow.

In [1]:
%load_ext autoreload
%autoreload 2
import sys
sys.path.append('..')
import os
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt 
from datetime import datetime, timedelta
import netCDF4 as nc
import torch
from torch import nn, utils
from torch.utils.tensorboard import SummaryWriter
from src import load_data, evaluate, conv_lstm, datasets, utils, stgcn
import random
import pickle
import json
import networkx as nx

time_stamp = datetime.now().strftime('%Y%m%d-%H%M%S')
time_stamp

'20190824-090529'

In [2]:
import logging
logger = logging.getLogger()
fhandler = logging.FileHandler(filename='../log.out', mode='a')
chandler = logging.StreamHandler(sys.stdout)
formatter = logging.Formatter('%(asctime)s - {} - %(message)s'.format(time_stamp))
fhandler.setFormatter(formatter)
chandler.setFormatter(formatter)
logger.addHandler(fhandler)
logger.addHandler(chandler)
logger.setLevel(logging.INFO)

In [3]:
USE_CUDA = False
if torch.cuda.is_available():
    print('CUDA Available')
    USE_CUDA = True
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
device = torch.device('cuda:0' if USE_CUDA else 'cpu')
num_devices = torch.cuda.device_count() if USE_CUDA else 0
logger.warning('cuda devices: {}'.format(list(torch.cuda.get_device_name(i) for i in range(num_devices))))
torch.manual_seed(0)
np.random.seed(0)
random.seed(0)

CUDA Available
2019-08-24 09:05:29,998 - 20190824-090529 - cuda devices: ['Tesla V100-SXM2-16GB']


In [4]:
partitioning_strategy = 'distance'  # 'distance' or 'unilabel', see https://arxiv.org/abs/1801.07455
max_hops = 3 if partitioning_strategy == 'distance' else None
rdrs_vars = [4, 5]
agg = ['sum', 'minmax']
include_month = True
dem, landcover, soil, groundwater = False, False, False, False
landcover_types = []
seq_len = 8
seq_steps = 1

train_start = datetime.strptime('2010-01-01', '%Y-%m-%d') + timedelta(days=seq_len * seq_steps)  # first day for which to make a prediction in train set
train_end = '2012-12-31'
test_start = '2013-01-01'
test_end = '2014-12-31'
spatial_val = False  # Whether the val set does spatial or temporal validation
val_fraction = 0.1 if not spatial_val else None

In [5]:
with open('../data/simulations_shervan/subbasins.geojson', 'r') as f:
     subbasin_shapes = json.loads(f.read())

subbasin_graph = utils.create_subbasin_graph()
component_graph = subbasin_graph.copy()
component_graph.remove_nodes_from(['sub-1', 'sub1', 'sub474'])  # remove Lake Erie and sink to get connected components
connected_components = list(nx.connected_components(nx.Graph(component_graph)))

# Split into train/test/val regions
test_subbasins = [1, 474]
train_subbasins = []
val_subbasins = []
for component in connected_components:
    max_x = -999
    for node in component:
        subbasin = list(s['properties'] for s in subbasin_shapes['features'] if 'sub' + str(s['properties']['SubId']) == node)[0]
        max_x = max(max_x, subbasin['INSIDE_X'])
    if max_x < -81.9:
        train_subbasins += list(int(c[3:]) for c in component)
    elif -80.6 > max_x and max_x >= -81.9:
        val_subbasins += list(int(c[3:]) for c in component)
    else:
        test_subbasins += list(int(c[3:]) for c in component)
        
if not spatial_val:  # if no spatial validation, use same graph but different samples
    train_subbasins += val_subbasins
    val_subbasins = train_subbasins
    
train_subgraph = subbasin_graph.subgraph(list('sub' + str(t) for t in train_subbasins))
val_subgraph = subbasin_graph.subgraph(list('sub' + str(t) for t in val_subbasins))
test_subgraph = subbasin_graph.subgraph(list('sub' + str(t) for t in test_subbasins))
if partitioning_strategy == 'unilabel':
    train_adjacency = torch.unsqueeze(torch.from_numpy(nx.to_numpy_array(train_subgraph)), 0).float().to(device)
    val_adjacency = torch.unsqueeze(torch.from_numpy(nx.to_numpy_array(val_subgraph)), 0).float().to(device)
    test_adjacency = torch.unsqueeze(torch.from_numpy(nx.to_numpy_array(test_subraph)), 0).float().to(device)
elif partitioning_strategy == 'distance':  # use distances in upstream-graph, i.e. in reversed downstream-graph
    train_adjacency = utils.create_hop_matrix(train_subgraph.reverse(), max_hops).float().to(device)
    val_adjacency = utils.create_hop_matrix(val_subgraph.reverse(), max_hops).float().to(device)
    test_adjacency = utils.create_hop_matrix(test_subgraph.reverse(), max_hops).float().to(device)
else:
    raise Exception('Unsupported partitioning strategy')

subbasins = list(set(train_subbasins + test_subbasins + val_subbasins))

In [6]:
train_dataset = datasets.SubbasinAggregatedDataset(rdrs_vars, train_subbasins, seq_len, seq_steps, train_start, train_end, aggregate_daily=agg, include_months=include_month, 
                                                   dem=dem, landcover=landcover, soil=soil, groundwater=groundwater, landcover_types=landcover_types)
if spatial_val:
    val_dataset = datasets.SubbasinAggregatedDataset(rdrs_vars, val_subbasins, seq_len, seq_steps, train_start, train_end, aggregate_daily=agg, include_months=include_month, 
                                                     conv_scalers=train_dataset.scalers, dem=dem, landcover=landcover, soil=soil, groundwater=groundwater, landcover_types=landcover_types)
else:
    val_dataset = train_dataset

# Two test datasets: one with spatial and temporal validation (i.e., different graph, different time), and one with only temporal validation (i.e. different time period only)
spatial_test_dataset = datasets.SubbasinAggregatedDataset(rdrs_vars, test_subbasins, seq_len, seq_steps, test_start, test_end, aggregate_daily=agg, include_months=include_month, 
                                                          conv_scalers=train_dataset.scalers, dem=dem, landcover=landcover, soil=soil, groundwater=groundwater, landcover_types=landcover_types)
temporal_test_dataset = datasets.SubbasinAggregatedDataset(rdrs_vars, train_subbasins, seq_len, seq_steps, test_start, test_end, aggregate_daily=agg, include_months=include_month, 
                                                           conv_scalers=train_dataset.scalers, dem=dem, landcover=landcover, soil=soil, groundwater=groundwater, landcover_types=landcover_types)

station_subbasins = train_dataset.simulated_streamflow[~pd.isna(train_dataset.simulated_streamflow['StationID'])]['subbasin'].unique()

  
[Parallel(n_jobs=-1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=-1)]: Done 586 out of 586 | elapsed:  2.6min finished
  # This is added back by InteractiveShellApp.init_path()
[Parallel(n_jobs=-1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=-1)]: Done 138 out of 138 | elapsed:  1.1min finished
  del sys.path[0]
[Parallel(n_jobs=-1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=-1)]: Done 586 out of 586 | elapsed:  1.8min finished


In [7]:
# Train model
num_epochs = 500
learning_rate = 2e-3
patience = 300
min_improvement = 0.01
best_loss_model = (-1, np.inf, None)
dropout = 0.5
weight_decay = 1e-4

batch_size = 32
temp_kernel_size = 3
model = stgcn.Model(train_dataset.x.shape[2], train_adjacency.shape[0], temp_kernel_size, dropout=dropout).to(device)
if num_devices > 1:
    model = torch.nn.DataParallel(model, device_ids=list(range(num_devices)))
loss_fn = evaluate.NSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate, weight_decay=weight_decay)

writer = SummaryWriter(comment='STGCN_simulationTraining')
param_description = {'time_stamp': time_stamp, 'batch_size': batch_size, 'loss': loss_fn, 'include_month': include_month, 'aggregate_daily': agg, 'rdrs_vars': rdrs_vars, 'dropout': dropout, 'spatial_validation': spatial_val, 'val_fraction': val_fraction, 'temp_kernel': temp_kernel_size,
                     'optimizer': optimizer, 'lr': learning_rate, 'patience': patience, 'min_improvement': min_improvement, 'x_train_shape': train_dataset.x.shape, 'x_val_shape': val_dataset.x.shape, 'partitioning_strategy': partitioning_strategy, 'max_hops': max_hops,
                     'spatial_x_test_shape': spatial_test_dataset.x.shape, 'temporal_x_test_shape': temporal_test_dataset.x.shape, 'num_epochs': num_epochs, 'seq_len': seq_len, 'seq_steps': seq_steps, 'train_start': train_start, 'train_end': train_end, 'weight_decay': weight_decay, 
                     'landcover_types': landcover_types, 'test_start': test_start, 'test_end': test_end, 'model': str(model).replace('\n','').replace(' ', ''),}
writer.add_text('Parameter Description', str(param_description))
str(param_description)

"{'time_stamp': '20190824-090529', 'batch_size': 32, 'loss': NSELoss(), 'include_month': True, 'aggregate_daily': ['sum', 'minmax'], 'rdrs_vars': [4, 5], 'dropout': 0.5, 'spatial_validation': False, 'val_fraction': 0.1, 'temp_kernel': 3, 'optimizer': Adam (\nParameter Group 0\n    amsgrad: False\n    betas: (0.9, 0.999)\n    eps: 1e-08\n    lr: 0.002\n    weight_decay: 0.0001\n), 'lr': 0.002, 'patience': 300, 'min_improvement': 0.01, 'x_train_shape': torch.Size([1088, 8, 46, 586]), 'x_val_shape': torch.Size([1088, 8, 46, 586]), 'partitioning_strategy': 'distance', 'max_hops': 3, 'spatial_x_test_shape': torch.Size([730, 8, 46, 138]), 'temporal_x_test_shape': torch.Size([730, 8, 46, 586]), 'num_epochs': 500, 'seq_len': 8, 'seq_steps': 1, 'train_start': datetime.datetime(2010, 1, 9, 0, 0), 'train_end': '2012-12-31', 'weight_decay': 0.0001, 'landcover_types': [], 'test_start': '2013-01-01', 'test_end': '2014-12-31', 'model': 'Model((data_bn):Identity()(st_gcn_networks):ModuleList((0):st_gc

In [8]:
if spatial_val:
    train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size, shuffle=True, pin_memory=True, drop_last=False)
    val_dataloader = torch.utils.data.DataLoader(val_dataset, batch_size, shuffle=True, pin_memory=True, drop_last=False)
else:
    val_indices = np.random.choice(len(train_dataset), size=int(val_fraction * len(train_dataset)), replace=False)
    train_indices = list(i for i in range(len(train_dataset)) if i not in val_indices)
    train_sampler = torch.utils.data.SubsetRandomSampler(train_indices)
    val_sampler = torch.utils.data.SubsetRandomSampler(val_indices)
    train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size, sampler=train_sampler, pin_memory=True, drop_last=False)
    val_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size, sampler=val_sampler, pin_memory=True, drop_last=False)
    
spatial_test_dataloader = torch.utils.data.DataLoader(spatial_test_dataset, batch_size, shuffle=False, pin_memory=True, drop_last=False)
temporal_test_dataloader = torch.utils.data.DataLoader(temporal_test_dataset, batch_size, shuffle=False, pin_memory=True, drop_last=False)

In [9]:
# Subbasins with constant streamflow will divide by zero in loss calculation. Calculate loss without them.
train_non_constant_subbasin_mask = ((train_dataset.y_sim.min(dim=0)[0] - train_dataset.y_sim.max(dim=0)[0]) != 0).to(device)
val_non_constant_subbasin_mask = ((val_dataset.y_sim.min(dim=0)[0] - val_dataset.y_sim.max(dim=0)[0]) != 0).to(device)

y_train_means = train_dataset.y_sim_means[train_non_constant_subbasin_mask].to(device)
y_val_means = val_dataset.y_sim_means[val_non_constant_subbasin_mask].to(device)

In [10]:
torch.manual_seed(0)
np.random.seed(0)
for epoch in range(num_epochs):
    model.train()

    train_losses = torch.tensor(0.0)
    for i, train_batch in enumerate(train_dataloader):
        y_pred = model(train_batch['x'].permute(0,2,1,3).to(device), train_adjacency)
        train_loss = loss_fn(y_pred[:,train_non_constant_subbasin_mask], train_batch['y_sim'][:,train_non_constant_subbasin_mask].to(device), means=y_train_means)
        
        optimizer.zero_grad()
        train_loss.backward()
        optimizer.step()
        
        train_losses += train_loss.detach()
        
    train_loss = (train_losses / len(train_dataloader)).item()
    print('Epoch', epoch, 'mean train loss:\t{}'.format(train_loss))
    writer.add_scalar('loss_nse', train_loss, epoch)
    
    model.eval()
    val_losses = torch.tensor(0.0)
    for i, val_batch in enumerate(val_dataloader):
        y_pred = model(val_batch['x'].permute(0,2,1,3).to(device), val_adjacency).detach()
        val_losses += loss_fn(y_pred[:,val_non_constant_subbasin_mask], val_batch['y_sim'][:,val_non_constant_subbasin_mask].to(device), means=y_val_means).detach()
            
    val_loss = (val_losses / len(val_dataloader)).item()
    print('Epoch', epoch, 'mean val loss:\t{}'.format(val_loss))
    writer.add_scalar('loss_nse_val', val_loss, epoch)
    
    if val_loss < best_loss_model[1] - min_improvement:
        best_loss_model = (epoch, val_loss, model.state_dict())  # new best model
        load_data.pickle_model('STGCN_simulationTraining', model, 'allStations', time_stamp, model_type='torch.dill')
    elif epoch > best_loss_model[0] + patience:
        print('Patience exhausted in epoch {}. Best val-loss was {}'.format(epoch, best_loss_model[1]))
        break
    
print('Using best model from epoch', str(best_loss_model[0]), 'which had loss', str(best_loss_model[1]))
model.load_state_dict(best_loss_model[2])
load_data.save_model_with_state('STGCN_simulationTraining', best_loss_model[0], model, optimizer, time_stamp, use_dill=True)

Epoch 0 mean train loss:	10.930386543273926
Epoch 0 mean val loss:	1.7219432592391968
Saved model as /home/mgauch/runoff-nn/src/../pickle/models/STGCN_simulationTraining_allStations_20190824-090529.pkl
Epoch 1 mean train loss:	2.2025773525238037
Epoch 1 mean val loss:	1.549200177192688
Saved model as /home/mgauch/runoff-nn/src/../pickle/models/STGCN_simulationTraining_allStations_20190824-090529.pkl
Epoch 2 mean train loss:	1.8066591024398804
Epoch 2 mean val loss:	1.5977925062179565
Epoch 3 mean train loss:	1.6462074518203735
Epoch 3 mean val loss:	1.4576125144958496
Saved model as /home/mgauch/runoff-nn/src/../pickle/models/STGCN_simulationTraining_allStations_20190824-090529.pkl
Epoch 4 mean train loss:	1.5615017414093018
Epoch 4 mean val loss:	1.4758143424987793
Epoch 5 mean train loss:	1.5347930192947388
Epoch 5 mean val loss:	1.5395427942276
Epoch 6 mean train loss:	1.5012174844741821
Epoch 6 mean val loss:	1.4574966430664062
Epoch 7 mean train loss:	1.4864284992218018
Epoch 7 me

In [11]:
del y_train_means, y_val_means, y_pred, train_non_constant_subbasin_mask, val_non_constant_subbasin_mask
if USE_CUDA:
    torch.cuda.empty_cache()

In [12]:
logger.warning('predicting')
model.eval()

spatial_test_predictions = []  # test on different graph, different time
for i, test_batch in enumerate(spatial_test_dataloader):
    pred = model(test_batch['x'].permute(0,2,1,3).to(device), test_adjacency).detach().cpu()
    spatial_test_predictions.append(pred)
    
temporal_test_predictions = []  # test on train graph but different time
for i, test_batch in enumerate(temporal_test_dataloader):
    pred = model(test_batch['x'].permute(0,2,1,3).to(device), train_adjacency).detach().cpu()
    temporal_test_predictions.append(pred)
    
predictions = torch.cat([torch.cat(spatial_test_predictions), torch.cat(temporal_test_predictions)], dim=1)

2019-08-24 10:07:32,079 - 20190824-090529 - predicting


In [13]:
actuals = spatial_test_dataset.data_streamflow.copy()
if len(actuals['date'].unique()) != len(predictions):
    print('Warning: length of prediction {} and actuals {} does not match.'.format(len(predictions), len(actuals['date'].unique())))

nse_dict, nse_sim_dict = {}, {}
mse_dict, mse_sim_dict = {}, {}
predictions_df = pd.DataFrame(columns=actuals.columns)
predictions_df['is_test_subbasin'] = False
for i in range(len(test_subbasins + train_subbasins)):
    subbasin = (test_subbasins + train_subbasins)[i]
    station = None
    subbasin_sim = spatial_test_dataset.simulated_streamflow[spatial_test_dataset.simulated_streamflow['subbasin'] == subbasin].set_index('date')
    if subbasin in station_subbasins:
        station = subbasin_sim['StationID'].values[0]
        act = actuals[actuals['station'] == station].set_index('date')['runoff']
    if predictions.shape[0] != subbasin_sim.shape[0]:
        print('Warning: length of prediction {} and actuals {} does not match for subbasin {}. Ignoring excess actuals.'.format(len(predictions), len(subbasin_sim), subbasin))
        subbasin_sim = subbasin_sim.iloc[:predictions.shape[0]]
        if station is not None:
            act = act.iloc[:predictions.shape[0]]
            
    pred = pd.DataFrame({'runoff': predictions[:,i]}, index=subbasin_sim.index)
    pred['subbasin'] = subbasin
    pred['station'] = station
    pred['is_test_subbasin'] = subbasin in test_subbasins
    predictions_df = predictions_df.append(pred.reset_index(), sort=True)
    subbasin_type = 'test' if subbasin in test_subbasins else 'train'
    nse_sim, mse_sim = evaluate.evaluate_daily('Sub{}'.format(subbasin), pred['runoff'], subbasin_sim['simulated_streamflow'], writer=writer, group=subbasin_type)
    nse_sim_dict[subbasin] = nse_sim
    mse_sim_dict[subbasin] = mse_sim

    if station is not None:
        nse, mse = evaluate.evaluate_daily(station, pred['runoff'], act, writer=writer)
        nse_dict[subbasin] = nse
        mse_dict[subbasin] = mse
        print(station, subbasin, '\tNSE:', nse, '\tMSE:', mse, '(clipped to 0)')
    print(subbasin, '\tNSE sim:', nse_sim, '\tMSE sim:', mse_sim)


To register the converters:
	>>> from pandas.plotting import register_matplotlib_converters
	>>> register_matplotlib_converters()


1 	NSE sim: -1.1762629445911443 	MSE sim: 2517779.3878391865
474 	NSE sim: 0.2863826312810952 	MSE sim: 835118.7248711331
3 	NSE sim: -0.3364660587379986 	MSE sim: 42.528966571177854
4 	NSE sim: 0.1455951181385965 	MSE sim: 14.558645053127186
11 	NSE sim: 0.1014539520290304 	MSE sim: 10.965085129863684
5 	NSE sim: -0.656972398920034 	MSE sim: 40.188960464337626
04215500 706 	NSE: -0.08745511021188768 	MSE: 196.37798488867446 (clipped to 0)
706 	NSE sim: -0.2502399833703972 	MSE sim: 37.620131116314816
04214500 705 	NSE: -0.22547298391156545 	MSE: 147.71494374531852 (clipped to 0)
705 	NSE sim: -0.5872934243461703 	MSE sim: 38.00037350547198
04215000 688 	NSE: 0.11853532202969363 	MSE: 84.66716172035719 (clipped to 0)
688 	NSE sim: -0.011406548540834338 	MSE sim: 8.142177016510345
556 	NSE sim: 0.33483295060990337 	MSE sim: 154.3699833569193
9 	NSE sim: 0.04083379849411084 	MSE sim: 3.8933725279545746
10 	NSE sim: 0.4967477667466167 	MSE sim: 2.868454817761476
8 	NSE sim: 0.479070858289

  np.sum((evaluation - np.mean(evaluation)) ** 2, dtype=np.float64))


04174500 707 	NSE: -0.14292160218574756 	MSE: 108.09093800413892 (clipped to 0)
707 	NSE sim: 0.36192453826072746 	MSE sim: 102.07244094138208
215 	NSE sim: 0.17027395818049196 	MSE sim: 0.6654715024212683
505 	NSE sim: 0.009227444140392116 	MSE sim: 2.817426748136663
253 	NSE sim: 0.3773638925896876 	MSE sim: 0.7787960682113214
506 	NSE sim: 0.3232732132074573 	MSE sim: 23.796541163202185
507 	NSE sim: 0.30769349847858496 	MSE sim: 0.009642933297801883
647 	NSE sim: 0.27798232065201733 	MSE sim: 0.025236309612063735
272 	NSE sim: 0.23795017665588247 	MSE sim: 12.619513954331172
254 	NSE sim: 0.31097072011506866 	MSE sim: 46.682998681684346
653 	NSE sim: 0.21921873294728933 	MSE sim: 172.01478014647063
648 	NSE sim: 0.09913995187827229 	MSE sim: 0.6540518714886366
583 	NSE sim: -0.5327747364386284 	MSE sim: 396.752330619664
508 	NSE sim: 0.31546533801340804 	MSE sim: 1.7839192392607253
243 	NSE sim: 0.38856279967412655 	MSE sim: 53.74369954136656
513 	NSE sim: 0.4249433941402999 	MSE s

  np.sum((evaluation - np.mean(evaluation)) ** 2, dtype=np.float64))


525 	NSE sim: nan 	MSE sim: 0.0003640947791436109
526 	NSE sim: 0.17464394616987355 	MSE sim: 0.00032062889273128516
531 	NSE sim: 0.27384090444030496 	MSE sim: 0.8679454378299963
660 	NSE sim: 0.2552959052334226 	MSE sim: 0.7850552821510467
532 	NSE sim: 0.31313645146610847 	MSE sim: 0.2257633728606364
587 	NSE sim: 0.16624494125873968 	MSE sim: 3.738707921431241
589 	NSE sim: 0.10904136988659163 	MSE sim: 0.04095752069705513
592 	NSE sim: 0.10958335032573752 	MSE sim: 0.5018181320159109
601 	NSE sim: 0.2934697970919671 	MSE sim: 0.2260609350068892
611 	NSE sim: 0.051595729555235614 	MSE sim: 0.1707134916183107
612 	NSE sim: 0.20936177826250824 	MSE sim: 0.2960253967564158
616 	NSE sim: 0.24680107507823046 	MSE sim: 1.5477073981340077
618 	NSE sim: 0.29630493189644747 	MSE sim: 0.6376807570754354
619 	NSE sim: 0.25449295551232265 	MSE sim: 0.9028966416938438
622 	NSE sim: 0.18253408005607707 	MSE sim: 0.4847468424096126
109 	NSE sim: 0.12497168686188442 	MSE sim: 271.85698735302174
54

In [14]:
def print_nse_mse(name, nse_dict, mse_dict, subbasins):
    nses = list(nse_dict[s] for s in subbasins)
    mses = list(mse_dict[s] for s in subbasins)
    print(name, 'Median NSE (clipped to 0)', np.nanmedian(nses), '/ Min', np.nanmin(nses), '/ Max', np.nanmax(nses))
    print(' ' * len(name), 'Median MSE (clipped to 0)', np.nanmedian(mses), '/ Min', np.nanmin(mses), '/ Max', np.nanmax(mses))
    
    return np.nanmedian(nses)

nse_median_sim_temporal = print_nse_mse('Temporal test sim', nse_sim_dict, mse_sim_dict, train_subbasins)
nse_median_sim_spatial = print_nse_mse('Spatial test sim', nse_sim_dict, mse_sim_dict, test_subbasins)
nse_median_stations_temporal = print_nse_mse('Stations temporal test', nse_dict, mse_dict, list(s for s in station_subbasins if s in train_subbasins))
nse_median_stations_spatial = print_nse_mse('Stations spatial test', nse_dict, mse_dict, list(s for s in station_subbasins if s in test_subbasins))

writer.add_scalar('nse_median_sim_temporal', nse_median_sim_temporal)
writer.add_scalar('nse_median_sim', nse_median_sim_spatial)
writer.add_scalar('nse_median_stations_temporal', nse_median_stations_temporal)
writer.add_scalar('nse_median_stations_spatial', nse_median_stations_spatial)

Temporal test sim Median NSE (clipped to 0) 0.29298552065397016 / Min -1.1813858142290363 / Max 0.5913098927985324
                  Median MSE (clipped to 0) 8.249121642920503 / Min 0.00032062889273128516 / Max 92235.4126223418
Spatial test sim Median NSE (clipped to 0) -0.24268326896867976 / Min -521.7687992395479 / Max 0.4967477667466167
                 Median MSE (clipped to 0) 38.5371796555967 / Min 0.1388002909615098 / Max 2517779.3878391865
Stations temporal test Median NSE (clipped to 0) 0.1043872380521248 / Min -2.1191413103824686 / Max 0.31420803597824487
                       Median MSE (clipped to 0) 171.8668903808483 / Min 10.715119438013366 / Max 48554.58489460181
Stations spatial test Median NSE (clipped to 0) -0.22076614872238498 / Min -1.3820322159983491 / Max 0.1454461607530324
                      Median MSE (clipped to 0) 195.62425283762167 / Min 26.72362007650652 / Max 14282.61359564434


In [15]:
nse_dict

{706: -0.08745511021188768,
 705: -0.22547298391156545,
 688: 0.11853532202969363,
 684: -0.25977910629620604,
 676: -0.18434634932010363,
 687: -0.901844334017984,
 685: -0.2160593135332045,
 677: -0.9253128736274159,
 686: 0.1454461607530324,
 704: 0.011037771884186864,
 703: -1.3820322159983491,
 710: -0.16394399365459966,
 702: -0.2906634926708782,
 701: -0.29419589163587734,
 718: 0.1267094885446326,
 717: 0.14334940456630996,
 716: 0.15713039522517924,
 714: 0.22088038085411843,
 709: 0.10789141055642515,
 724: 0.1126009836678541,
 715: 0.19071374889984616,
 723: 0.2095177587470678,
 713: 0.31420803597824487,
 712: 0.12789004798287007,
 707: -0.14292160218574756,
 719: 0.041299400734141734,
 720: -0.05881623094455701,
 721: 0.12057891802884346,
 697: 0.15515595040244134,
 692: -0.42741498053147153,
 678: 0.06416465830660012,
 696: -0.4582392723532269,
 694: -0.060616675398430075,
 682: 0.1609281628434881,
 689: -0.4611577608138204,
 681: 0.2708910209603408,
 698: -0.1593074151872

In [16]:
writer.close()

In [17]:
save_df = pd.merge(predictions_df.rename({'runoff': 'prediction'}, axis=1), 
                   spatial_test_dataset.simulated_streamflow, on=['date', 'subbasin'])
save_df = pd.merge(save_df, actuals.rename({'runoff': 'actual'}, axis=1), how='left', on=['date', 'station'])\
            [['date', 'subbasin', 'station', 'prediction', 'actual', 'simulated_streamflow', 'is_test_subbasin']]
load_data.pickle_results('STGCN_simulationTraining', save_df, time_stamp)

'STGCN_simulationTraining_20190824-090529.pkl'

In [18]:
_ = print(train_subbasins), print(val_subbasins), print(test_subbasins)

[112, 602, 130, 123, 617, 124, 131, 718, 132, 140, 608, 144, 145, 136, 717, 135, 138, 538, 141, 664, 137, 716, 142, 155, 614, 139, 151, 603, 143, 533, 212, 205, 159, 606, 188, 181, 714, 146, 189, 199, 160, 200, 180, 615, 147, 166, 148, 620, 230, 231, 173, 516, 518, 221, 522, 191, 709, 514, 225, 217, 517, 152, 515, 178, 238, 241, 242, 218, 237, 274, 226, 655, 596, 527, 659, 179, 245, 186, 210, 668, 187, 161, 219, 222, 286, 201, 209, 551, 621, 208, 724, 184, 247, 263, 715, 174, 175, 550, 246, 262, 295, 153, 202, 206, 236, 227, 162, 723, 185, 154, 169, 156, 593, 182, 158, 163, 588, 164, 599, 165, 609, 665, 429, 408, 393, 364, 451, 452, 403, 387, 232, 353, 300, 330, 359, 341, 363, 328, 375, 661, 332, 316, 427, 674, 666, 233, 377, 349, 409, 366, 419, 395, 404, 357, 329, 322, 416, 358, 339, 355, 376, 348, 443, 362, 461, 255, 459, 417, 347, 301, 430, 523, 529, 428, 534, 257, 195, 240, 297, 549, 299, 380, 426, 422, 344, 356, 455, 372, 319, 198, 425, 170, 441, 468, 374, 340, 365, 266, 223, 390,

In [19]:
list(k for k in list(nse_dict.keys()) if k in test_subbasins)

[706, 705, 688, 684, 676, 687, 685, 677, 686, 704, 703, 710, 702, 701]

In [20]:
datetime.now().strftime('%Y%m%d-%H%M%S')

'20190824-100913'