ConvLSTM trained on simulated streamflow.

In [1]:
%load_ext autoreload
%autoreload 2
import sys
sys.path.append('..')
import os
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt 
from datetime import datetime, timedelta
import netCDF4 as nc
import torch
from torch import nn, utils
from torch.utils.tensorboard import SummaryWriter
from src import load_data, evaluate, conv_lstm, datasets, utils, stgcn
import random
import pickle
import json
import networkx as nx

time_stamp = datetime.now().strftime('%Y%m%d-%H%M%S')
time_stamp

'20190822-085928'

In [2]:
import logging
logger = logging.getLogger()
fhandler = logging.FileHandler(filename='../log.out', mode='a')
chandler = logging.StreamHandler(sys.stdout)
formatter = logging.Formatter('%(asctime)s - {} - %(message)s'.format(time_stamp))
fhandler.setFormatter(formatter)
chandler.setFormatter(formatter)
logger.addHandler(fhandler)
logger.addHandler(chandler)
logger.setLevel(logging.INFO)

In [3]:
USE_CUDA = False
if torch.cuda.is_available():
    print('CUDA Available')
    USE_CUDA = True
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
device = torch.device('cuda:0' if USE_CUDA else 'cpu')
num_devices = torch.cuda.device_count() if USE_CUDA else 0
logger.warning('cuda devices: {}'.format(list(torch.cuda.get_device_name(i) for i in range(num_devices))))
torch.manual_seed(0)
np.random.seed(0)
random.seed(0)

CUDA Available
2019-08-22 08:59:28,913 - 20190822-085928 - cuda devices: ['Tesla V100-SXM2-16GB']


In [4]:
seq_len = 8
seq_steps = 1
train_start = datetime.strptime('2010-01-01', '%Y-%m-%d') + timedelta(days=seq_len * seq_steps)  # first day for which to make a prediction in train set
train_end = '2012-12-31'
test_start = '2013-01-01'
test_end = '2014-12-31'
spatial_val = False  # Whether the val set does spatial or temporal validation
if not spatial_val:
    val_fraction = 0.1

In [5]:
with open('../data/simulations_shervan/subbasins.geojson', 'r') as f:
     subbasin_shapes = json.loads(f.read())

subbasin_graph = utils.create_subbasin_graph()
component_graph = subbasin_graph.copy()
component_graph.remove_nodes_from(['sub-1', 'sub1', 'sub474'])  # remove Lake Erie and sink to get connected components
connected_components = list(nx.connected_components(nx.Graph(component_graph)))

# Split into train/test/val regions
test_subbasins = [1, 474]
train_subbasins = []
val_subbasins = []
for component in connected_components:
    max_x = -999
    for node in component:
        subbasin = list(s['properties'] for s in subbasin_shapes['features'] if 'sub' + str(s['properties']['SubId']) == node)[0]
        max_x = max(max_x, subbasin['INSIDE_X'])
    if max_x < -81.9:
        train_subbasins += list(int(c[3:]) for c in component)
    elif -80.6 > max_x and max_x >= -81.9:
        val_subbasins += list(int(c[3:]) for c in component)
    else:
        test_subbasins += list(int(c[3:]) for c in component)
        
if not spatial_val:  # if no spatial validation, use same graph but different samples
    train_subbasins += val_subbasins
    val_subbasins = train_subbasins
train_adjacency = torch.unsqueeze(torch.from_numpy(nx.to_numpy_array(subbasin_graph.subgraph(list('sub' + str(t) for t in train_subbasins)))), 0).float().to(device)
val_adjacency = torch.unsqueeze(torch.from_numpy(nx.to_numpy_array(subbasin_graph.subgraph(list('sub' + str(t) for t in val_subbasins)))), 0).float().to(device)
test_adjacency = torch.unsqueeze(torch.from_numpy(nx.to_numpy_array(subbasin_graph.subgraph(list('sub' + str(t) for t in test_subbasins)))), 0).float().to(device)

subbasins = list(set(train_subbasins + test_subbasins + val_subbasins))

In [6]:
rdrs_vars = [4,5]
agg = ['sum', 'minmax']
include_month = True
dem, landcover, soil, groundwater = True, True, False, False
landcover_types = None

In [7]:
train_dataset = datasets.SubbasinAggregatedDataset(rdrs_vars, train_subbasins, seq_len, seq_steps, train_start, train_end, aggregate_daily=agg, include_months=include_month, 
                                                   dem=dem, landcover=landcover, soil=soil, groundwater=groundwater, landcover_types=landcover_types)
if spatial_val:
    val_dataset = datasets.SubbasinAggregatedDataset(rdrs_vars, val_subbasins, seq_len, seq_steps, train_start, train_end, aggregate_daily=agg, include_months=include_month, 
                                                     conv_scalers=train_dataset.grid_dataset.conv_scalers, dem=dem, landcover=landcover, soil=soil, groundwater=groundwater, landcover_types=landcover_types)
else:
    val_dataset = train_dataset

# Two test datasets: one with spatial and temporal validation (i.e., different graph, different time), and one with only temporal validation (i.e. different time period only)
spatial_test_dataset = datasets.SubbasinAggregatedDataset(rdrs_vars, test_subbasins, seq_len, seq_steps, test_start, test_end, aggregate_daily=agg, include_months=include_month, 
                                                          conv_scalers=train_dataset.grid_dataset.conv_scalers, dem=dem, landcover=landcover, soil=soil, groundwater=groundwater, landcover_types=landcover_types)
temporal_test_dataset = datasets.SubbasinAggregatedDataset(rdrs_vars, train_subbasins, seq_len, seq_steps, test_start, test_end, aggregate_daily=agg, include_months=include_month, 
                                                           conv_scalers=train_dataset.grid_dataset.conv_scalers, dem=dem, landcover=landcover, soil=soil, groundwater=groundwater, landcover_types=landcover_types)

station_subbasins = train_dataset.grid_dataset.simulated_streamflow[~pd.isna(train_dataset.grid_dataset.simulated_streamflow['StationID'])]['subbasin'].unique()

  
  # This is added back by InteractiveShellApp.init_path()
  del sys.path[0]


In [8]:
# Train model
num_epochs = 600
learning_rate = 2e-3
patience = 200
min_improvement = 0.01
best_loss_model = (-1, np.inf, None)
dropout = 0.3
weight_decay = 1e-5

batch_size = 4
model = stgcn.Model(train_dataset.x.shape[2]).to(device)
if num_devices > 1:
    model = torch.nn.DataParallel(model, device_ids=list(range(num_devices)))
loss_fn = evaluate.NSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate, weight_decay=weight_decay)

writer = SummaryWriter(comment='STGCN_simulationTraining')
param_description = {'time_stamp': time_stamp, 'batch_size': batch_size, 'loss': loss_fn, 'include_month': include_month, 'aggregate_daily': agg, 'rdrs_vars': rdrs_vars, 'dropout': dropout, 'spatial_validation': spatial_val,
                     'optimizer': optimizer, 'lr': learning_rate, 'patience': patience, 'min_improvement': min_improvement, 'x_train_shape': train_dataset.x.shape, 'x_val_shape': val_dataset.x.shape, 
                     'spatial_x_test_shape': spatial_test_dataset.x.shape, 'temporal_x_test_shape': temporal_test_dataset.x.shape, 'num_epochs': num_epochs, 'seq_len': seq_len, 'seq_steps': seq_steps, 'train_start': train_start, 'train_end': train_end, 'weight_decay': weight_decay, 
                     'landcover_types': landcover_types, 'test_start': test_start, 'test_end': test_end, 'model': str(model).replace('\n','').replace(' ', ''),}
writer.add_text('Parameter Description', str(param_description))
str(param_description)

"{'time_stamp': '20190822-085928', 'batch_size': 4, 'loss': NSELoss(), 'include_month': True, 'aggregate_daily': ['sum', 'minmax'], 'rdrs_vars': [4, 5], 'dropout': 0.3, 'spatial_validation': False, 'optimizer': Adam (\nParameter Group 0\n    amsgrad: False\n    betas: (0.9, 0.999)\n    eps: 1e-08\n    lr: 0.002\n    weight_decay: 1e-05\n), 'lr': 0.002, 'patience': 200, 'min_improvement': 0.01, 'x_train_shape': torch.Size([1088, 8, 90, 586]), 'x_val_shape': torch.Size([1088, 8, 90, 586]), 'spatial_x_test_shape': torch.Size([730, 8, 90, 138]), 'temporal_x_test_shape': torch.Size([730, 8, 90, 586]), 'num_epochs': 600, 'seq_len': 8, 'seq_steps': 1, 'train_start': datetime.datetime(2010, 1, 9, 0, 0), 'train_end': '2012-12-31', 'weight_decay': 1e-05, 'landcover_types': None, 'test_start': '2013-01-01', 'test_end': '2014-12-31', 'model': 'Model((st_gcn_networks):ModuleList((0):st_gcn((gcn):ConvTemporalGraphical((conv):Conv2d(90,64,kernel_size=(1,1),stride=(1,1)))(tcn):Sequential((0):BatchNorm

In [9]:
if spatial_val:
    train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size, shuffle=True, pin_memory=True, drop_last=False)
    val_dataloader = torch.utils.data.DataLoader(val_dataset, batch_size, shuffle=True, pin_memory=True, drop_last=False)
else:
    val_indices = np.random.choice(len(train_dataset), size=int(val_fraction * len(train_dataset)), replace=False)
    train_indices = list(i for i in range(len(train_dataset)) if i not in val_indices)
    train_sampler = torch.utils.data.SubsetRandomSampler(train_indices)
    val_sampler = torch.utils.data.SubsetRandomSampler(val_indices)
    train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size, sampler=train_sampler, pin_memory=True, drop_last=False)
    val_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size, sampler=val_sampler, pin_memory=True, drop_last=False)
    
spatial_test_dataloader = torch.utils.data.DataLoader(spatial_test_dataset, batch_size, shuffle=False, pin_memory=True, drop_last=False)
temporal_test_dataloader = torch.utils.data.DataLoader(temporal_test_dataset, batch_size, shuffle=False, pin_memory=True, drop_last=False)

In [10]:
# Subbasins with constant streamflow will divide by zero in loss calculation. Calculate loss without them.
train_non_constant_subbasin_mask = ((train_dataset.y_sim.min(dim=0)[0] - train_dataset.y_sim.max(dim=0)[0]) != 0).to(device)
val_non_constant_subbasin_mask = ((val_dataset.y_sim.min(dim=0)[0] - val_dataset.y_sim.max(dim=0)[0]) != 0).to(device)

y_train_means = train_dataset.y_sim_means[train_non_constant_subbasin_mask].to(device)
y_val_means = val_dataset.y_sim_means[val_non_constant_subbasin_mask].to(device)

In [11]:
torch.manual_seed(0)
np.random.seed(0)
for epoch in range(num_epochs):
    model.train()

    train_losses = torch.tensor(0.0)
    for i, train_batch in enumerate(train_dataloader):
        y_pred = model(train_batch['x'].permute(0,2,1,3).to(device), train_adjacency)
        train_loss = loss_fn(y_pred[:,train_non_constant_subbasin_mask], train_batch['y_sim'][:,train_non_constant_subbasin_mask].to(device), means=y_train_means)
        
        optimizer.zero_grad()
        train_loss.backward()
        optimizer.step()
        
        train_losses += train_loss.detach()
        
    train_loss = (train_losses / len(train_dataloader)).item()
    print('Epoch', epoch, 'mean train loss:\t{}'.format(train_loss))
    writer.add_scalar('loss_nse', train_loss, epoch)
    
    model.eval()
    val_losses = torch.tensor(0.0)
    for i, val_batch in enumerate(val_dataloader):
        y_pred = model(val_batch['x'].permute(0,2,1,3).to(device), val_adjacency).detach()
        val_losses += loss_fn(y_pred[:,val_non_constant_subbasin_mask], val_batch['y_sim'][:,val_non_constant_subbasin_mask].to(device), means=y_val_means).detach()
            
    val_loss = (val_losses / len(val_dataloader)).item()
    print('Epoch', epoch, 'mean val loss:\t{}'.format(val_loss))
    writer.add_scalar('loss_nse_val', val_loss, epoch)
    
    if val_loss < best_loss_model[1] - min_improvement:
        best_loss_model = (epoch, val_loss, model.state_dict())  # new best model
        load_data.pickle_model('STGCN_simulationTraining', model, 'allStations', time_stamp, model_type='torch.dill')
    elif epoch > best_loss_model[0] + patience:
        print('Patience exhausted in epoch {}. Best val-loss was {}'.format(epoch, best_loss_model[1]))
        break
    
print('Using best model from epoch', str(best_loss_model[0]), 'which had loss', str(best_loss_model[1]))
model.load_state_dict(best_loss_model[2])
load_data.save_model_with_state('STGCN_simulationTraining', best_loss_model[0], model, optimizer, time_stamp, use_dill=True)

Epoch 0 mean train loss:	3.116175413131714
Epoch 0 mean val loss:	1.4640263319015503
Saved model as /home/mgauch/runoff-nn/src/../pickle/models/STGCN_simulationTraining_allStations_20190822-085928.pkl
Epoch 1 mean train loss:	1.4446171522140503
Epoch 1 mean val loss:	1.522050380706787
Epoch 2 mean train loss:	1.430527925491333
Epoch 2 mean val loss:	1.6130850315093994
Epoch 3 mean train loss:	1.4450246095657349
Epoch 3 mean val loss:	1.3870110511779785
Saved model as /home/mgauch/runoff-nn/src/../pickle/models/STGCN_simulationTraining_allStations_20190822-085928.pkl
Epoch 4 mean train loss:	1.3126039505004883
Epoch 4 mean val loss:	1.4583731889724731
Epoch 5 mean train loss:	1.2280635833740234
Epoch 5 mean val loss:	1.5492020845413208
Epoch 6 mean train loss:	1.1769514083862305
Epoch 6 mean val loss:	1.18733549118042
Saved model as /home/mgauch/runoff-nn/src/../pickle/models/STGCN_simulationTraining_allStations_20190822-085928.pkl
Epoch 7 mean train loss:	1.1877120733261108
Epoch 7 mea

In [12]:
del y_train_means, y_val_means, y_pred, train_non_constant_subbasin_mask, val_non_constant_subbasin_mask
if USE_CUDA:
    torch.cuda.empty_cache()

In [13]:
logger.warning('predicting')
model.eval()

spatial_test_predictions = []  # test on different graph, different time
for i, test_batch in enumerate(spatial_test_dataloader):
    pred = model(test_batch['x'].permute(0,2,1,3).to(device), test_adjacency).detach().cpu()
    spatial_test_predictions.append(pred)
    
temporal_test_predictions = []  # test on train graph but different time
for i, test_batch in enumerate(temporal_test_dataloader):
    pred = model(test_batch['x'].permute(0,2,1,3).to(device), train_adjacency).detach().cpu()
    temporal_test_predictions.append(pred)
    
predictions = torch.cat([torch.cat(spatial_test_predictions), torch.cat(temporal_test_predictions)], dim=1)

2019-08-22 11:33:26,505 - 20190822-085928 - predicting


In [14]:
actuals = spatial_test_dataset.grid_dataset.data_runoff.copy()
if len(actuals['date'].unique()) != len(predictions):
    print('Warning: length of prediction {} and actuals {} does not match.'.format(len(predictions), len(actuals['date'].unique())))

nse_dict, nse_sim_dict = {}, {}
mse_dict, mse_sim_dict = {}, {}
predictions_df = pd.DataFrame(columns=actuals.columns)
predictions_df['is_test_subbasin'] = False
for i in range(len(test_subbasins + train_subbasins)):
    subbasin = (test_subbasins + train_subbasins)[i]
    station = None
    subbasin_sim = spatial_test_dataset.grid_dataset.simulated_streamflow[spatial_test_dataset.grid_dataset.simulated_streamflow['subbasin'] == subbasin].set_index('date')
    if subbasin in station_subbasins:
        station = subbasin_sim['StationID'].values[0]
        act = actuals[actuals['station'] == station].set_index('date')['runoff']
    if predictions.shape[0] != subbasin_sim.shape[0]:
        print('Warning: length of prediction {} and actuals {} does not match for subbasin {}. Ignoring excess actuals.'.format(len(predictions), len(subbasin_sim), subbasin))
        subbasin_sim = subbasin_sim.iloc[:predictions.shape[0]]
        if station is not None:
            act = act.iloc[:predictions.shape[0]]
            
    pred = pd.DataFrame({'runoff': predictions[:,i]}, index=subbasin_sim.index)
    pred['subbasin'] = subbasin
    pred['station'] = station
    pred['is_test_subbasin'] = subbasin in test_subbasins
    predictions_df = predictions_df.append(pred.reset_index(), sort=True)
    subbasin_type = 'test' if subbasin in test_subbasins else 'train'
    nse_sim, mse_sim = evaluate.evaluate_daily('Sub{}'.format(subbasin), pred['runoff'], subbasin_sim['simulated_streamflow'], writer=writer, group=subbasin_type)
    nse_sim_dict[subbasin] = nse_sim
    mse_sim_dict[subbasin] = mse_sim

    if station is not None:
        nse, mse = evaluate.evaluate_daily(station, pred['runoff'], act, writer=writer)
        nse_dict[subbasin] = nse
        mse_dict[subbasin] = mse
        print(station, subbasin, '\tNSE:', nse, '\tMSE:', mse, '(clipped to 0)')
    print(subbasin, '\tNSE sim:', nse_sim, '\tMSE sim:', mse_sim)


To register the converters:
	>>> from pandas.plotting import register_matplotlib_converters
	>>> register_matplotlib_converters()


1 	NSE sim: -1.1781387412614062 	MSE sim: 2519949.5494016493
474 	NSE sim: -1.137265896185082 	MSE sim: 2501159.3721953104
5 	NSE sim: -5.84362608381282 	MSE sim: 165.98841253742367
04215000 688 	NSE: -0.09984822682712746 	MSE: 105.64351586162755 (clipped to 0)
688 	NSE sim: -9.731384332535256 	MSE sim: 86.39140313434508
2 	NSE sim: -1.0435798620464931 	MSE sim: 173.45392333612114
10 	NSE sim: -8.04813876686421 	MSE sim: 51.572900272691506
11 	NSE sim: 0.02461483938038922 	MSE sim: 11.902763742326691
04214500 705 	NSE: -0.33286482539343343 	MSE: 160.65964349101694 (clipped to 0)
705 	NSE sim: -0.9410616198923631 	MSE sim: 46.46971090643153
12 	NSE sim: -0.3987964332220193 	MSE sim: 4.116371537837084
3 	NSE sim: -2.526526448051879 	MSE sim: 112.22097593948324
9 	NSE sim: -1.1403916521598894 	MSE sim: 8.688110615761152
4 	NSE sim: -3.3861709644478033 	MSE sim: 74.73822723789934
04215500 706 	NSE: -0.2827972170437454 	MSE: 231.65382197226123 (clipped to 0)
706 	NSE sim: -1.019890810208472

  np.sum((evaluation - np.mean(evaluation)) ** 2, dtype=np.float64))


04174500 707 	NSE: -2.525466549814595 	MSE: 333.4183075574971 (clipped to 0)
707 	NSE sim: -0.6844921477659855 	MSE sim: 269.4669135220919
506 	NSE sim: 0.3702646018083311 	MSE sim: 22.14412761170562
653 	NSE sim: -0.731140634848034 	MSE sim: 381.3894982778869
671 	NSE sim: -0.33626985847741264 	MSE sim: 0.4149176158216093
280 	NSE sim: 0.28162069774756837 	MSE sim: 1.1703933481669264
512 	NSE sim: 0.4544678312514625 	MSE sim: 127.87109290417256
509 	NSE sim: 0.33962415547969294 	MSE sim: 38.00041140237222
279 	NSE sim: -0.7206600692914005 	MSE sim: 12.222366128573324
254 	NSE sim: 0.43806042887439967 	MSE sim: 38.072437592816975
511 	NSE sim: 0.31385639819801625 	MSE sim: 0.49978791086652696
243 	NSE sim: 0.47181737643713595 	MSE sim: 46.425844238139796
651 	NSE sim: 0.44515097864984454 	MSE sim: 19.737261437127053
505 	NSE sim: 0.20960710696344453 	MSE sim: 2.247613809252362
502 	NSE sim: 0.23087007795612335 	MSE sim: 0.08018375370177257
183 	NSE sim: 0.4181023774387228 	MSE sim: 137

  np.sum((evaluation - np.mean(evaluation)) ** 2, dtype=np.float64))


525 	NSE sim: -inf 	MSE sim: 0.010746239485222908
526 	NSE sim: 0.3395320255881501 	MSE sim: 0.00025657425584683653
660 	NSE sim: -0.4633220097678925 	MSE sim: 1.5426109260433836
531 	NSE sim: 0.4015341682859218 	MSE sim: 0.7153193997150277
532 	NSE sim: 0.3153429726140432 	MSE sim: 0.22503811722913675
587 	NSE sim: 0.02480378163988739 	MSE sim: 4.37295556807434
589 	NSE sim: 0.11113027266998421 	MSE sim: 0.04086149347862632
592 	NSE sim: -0.4488322520703776 	MSE sim: 0.8165281889150838
601 	NSE sim: -0.32807718189138524 	MSE sim: 0.42493069406512074
611 	NSE sim: -0.8918167579012566 	MSE sim: 0.3405284584936558
612 	NSE sim: -0.4218979125444289 	MSE sim: 0.5323773656971487
616 	NSE sim: -0.607375505601309 	MSE sim: 3.3029082746724914
618 	NSE sim: 0.2437322162538731 	MSE sim: 0.6853215757084502
619 	NSE sim: 0.23821076612155356 	MSE sim: 0.9226162865034395
622 	NSE sim: 0.21475119384028662 	MSE sim: 0.4656425057058658
108 	NSE sim: -1.3284394322509359 	MSE sim: 19.83923566720145
101 	

In [15]:
def print_nse_mse(name, nse_dict, mse_dict, subbasins):
    nses = list(nse_dict[s] for s in subbasins)
    mses = list(mse_dict[s] for s in subbasins)
    print(name, 'Median NSE (clipped to 0)', np.median(nses), '/ Min', np.min(nses), '/ Max', np.max(nses))
    print(' ' * len(name), 'Median MSE (clipped to 0)', np.median(mses), '/ Min', np.min(mses), '/ Max', np.max(mses))
    
    return np.median(nses)

nse_median_sim_temporal = print_nse_mse('Temporal test sim', nse_sim_dict, mse_sim_dict, train_subbasins)
nse_median_sim_spatial = print_nse_mse('Spatial test sim', nse_sim_dict, mse_sim_dict, test_subbasins)
nse_median_stations_temporal = print_nse_mse('Stations temporal test', nse_dict, mse_dict, list(s for s in station_subbasins if s in train_subbasins))
nse_median_stations_spatial = print_nse_mse('Stations spatial test', nse_dict, mse_dict, list(s for s in station_subbasins if s in test_subbasins))

writer.add_scalar('nse_median_sim_temporal', nse_median_sim_temporal)
writer.add_scalar('nse_median_sim', nse_median_sim_spatial)
writer.add_scalar('nse_median_stations_temporal', nse_median_stations_temporal)
writer.add_scalar('nse_median_stations_spatial', nse_median_stations_spatial)

Temporal test sim Median NSE (clipped to 0) 0.13783214790023773 / Min -inf / Max 0.5686901554310124
                  Median MSE (clipped to 0) 12.319852195548648 / Min 0.00025657425584683653 / Max 90905.56845423472
Spatial test sim Median NSE (clipped to 0) -0.6978719497520052 / Min -1045.9710055938533 / Max 0.36318041424937775
                 Median MSE (clipped to 0) 55.2235288932413 / Min 0.14797395157725243 / Max 2519949.5494016493
Stations temporal test Median NSE (clipped to 0) -0.18472548530348842 / Min -2.525466549814595 / Max 0.15972097317629508
                       Median MSE (clipped to 0) 320.5851475572937 / Min 14.859790658508915 / Max 100869.59212400897
Stations spatial test Median NSE (clipped to 0) -0.41293025228527225 / Min -2.3867053802839155 / Max -0.09984822682712746
                      Median MSE (clipped to 0) 216.5852719801101 / Min 47.20407839483999 / Max 14940.88795874076


In [16]:
nse_dict

{688: -0.09984822682712746,
 705: -0.33286482539343343,
 706: -0.2827972170437454,
 677: -0.37855472736520857,
 676: -0.22473934513055993,
 686: -0.5094671419867991,
 685: -0.45268369366134986,
 684: -0.31648949254637704,
 687: -0.9894988350167382,
 704: -0.46890856134262693,
 703: -2.3867053802839155,
 710: -0.3488427242390084,
 702: -0.5286431632195705,
 701: -0.44730577720533593,
 718: 0.06308161278871682,
 717: -0.17026835688117603,
 716: 0.03153089288765831,
 714: -0.19471665862741072,
 709: -0.5965222389389684,
 723: 0.11846377918921891,
 724: -0.16713854108002368,
 715: -0.24771508569094736,
 713: -0.4246966757713597,
 712: 0.11903930664755702,
 707: -2.525466549814595,
 719: -0.004818275296760044,
 720: 0.011598446548140129,
 721: -0.6852279678324202,
 695: -0.18434651274235447,
 681: 0.05196814811544881,
 683: 0.040197838542335385,
 692: -0.9128930527457966,
 691: -2.1895957974481246,
 693: -0.977248535076715,
 697: -0.18510445786462237,
 678: -0.17901275442626274,
 696: -0.45

In [17]:
writer.close()

In [18]:
save_df = pd.merge(predictions_df.rename({'runoff': 'prediction'}, axis=1), 
                   spatial_test_dataset.grid_dataset.simulated_streamflow, on=['date', 'subbasin'])
save_df = pd.merge(save_df, actuals.rename({'runoff': 'actual'}, axis=1), how='left', on=['date', 'station'])\
            [['date', 'subbasin', 'station', 'prediction', 'actual', 'simulated_streamflow', 'is_test_subbasin']]
load_data.pickle_results('STGCN_simulationTraining', save_df, time_stamp)

'STGCN_simulationTraining_20190822-085928.pkl'

In [19]:
_ = print(train_subbasins), print(val_subbasins), print(test_subbasins)

[602, 112, 132, 131, 123, 130, 617, 140, 718, 124, 717, 144, 608, 145, 135, 136, 138, 664, 538, 142, 141, 716, 155, 137, 614, 139, 603, 533, 151, 143, 160, 714, 212, 180, 606, 199, 200, 159, 188, 205, 181, 146, 189, 615, 147, 166, 620, 148, 237, 178, 218, 596, 152, 226, 659, 186, 230, 245, 518, 221, 515, 217, 517, 655, 274, 522, 516, 225, 238, 242, 210, 527, 709, 231, 179, 668, 173, 241, 514, 187, 191, 162, 227, 286, 236, 175, 723, 295, 202, 724, 174, 550, 621, 206, 222, 247, 551, 263, 715, 161, 262, 219, 184, 153, 185, 209, 246, 154, 201, 208, 169, 156, 182, 158, 593, 588, 163, 599, 164, 609, 165, 317, 332, 403, 306, 546, 362, 540, 233, 429, 234, 330, 420, 229, 414, 328, 446, 239, 713, 359, 171, 390, 341, 386, 381, 417, 529, 321, 438, 461, 352, 255, 455, 267, 353, 666, 421, 452, 375, 214, 339, 319, 349, 405, 404, 674, 356, 411, 310, 528, 376, 342, 327, 394, 301, 300, 358, 343, 296, 344, 549, 542, 374, 450, 322, 355, 439, 427, 408, 364, 371, 347, 329, 661, 377, 357, 425, 311, 361, 380,

In [20]:
list(k for k in list(nse_dict.keys()) if k in test_subbasins)

[688, 705, 706, 677, 676, 686, 685, 684, 687, 704, 703, 710, 702, 701]

In [21]:
datetime.now().strftime('%Y%m%d-%H%M%S')

'20190822-113512'