ConvLSTM trained on simulated streamflow.

In [1]:
%load_ext autoreload
%autoreload 2
import sys
sys.path.append('..')
import os
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt 
from datetime import datetime, timedelta
import netCDF4 as nc
import torch
from torch import nn, utils
from torch.utils.tensorboard import SummaryWriter
from src import load_data, evaluate, conv_lstm, datasets, utils, stgcn
import random
import pickle
import json
import networkx as nx

time_stamp = datetime.now().strftime('%Y%m%d-%H%M%S')
time_stamp

'20190822-154141'

In [2]:
import logging
logger = logging.getLogger()
fhandler = logging.FileHandler(filename='../log.out', mode='a')
chandler = logging.StreamHandler(sys.stdout)
formatter = logging.Formatter('%(asctime)s - {} - %(message)s'.format(time_stamp))
fhandler.setFormatter(formatter)
chandler.setFormatter(formatter)
logger.addHandler(fhandler)
logger.addHandler(chandler)
logger.setLevel(logging.INFO)

In [3]:
USE_CUDA = False
if torch.cuda.is_available():
    print('CUDA Available')
    USE_CUDA = True
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
device = torch.device('cuda:0' if USE_CUDA else 'cpu')
num_devices = torch.cuda.device_count() if USE_CUDA else 0
logger.warning('cuda devices: {}'.format(list(torch.cuda.get_device_name(i) for i in range(num_devices))))
torch.manual_seed(0)
np.random.seed(0)
random.seed(0)

CUDA Available
2019-08-22 15:41:41,904 - 20190822-154141 - cuda devices: ['Tesla V100-SXM2-16GB']


In [4]:
partitioning_strategy = 'distance'  # 'distance' or 'unilabel', see https://arxiv.org/abs/1801.07455
max_hops = 10 if partitioning_strategy == 'distance' else None
seq_len = 8
seq_steps = 1
train_start = datetime.strptime('2010-01-01', '%Y-%m-%d') + timedelta(days=seq_len * seq_steps)  # first day for which to make a prediction in train set
train_end = '2012-12-31'
test_start = '2013-01-01'
test_end = '2014-12-31'
spatial_val = False  # Whether the val set does spatial or temporal validation
val_fraction = 0.1 if not spatial_val else None

In [5]:
with open('../data/simulations_shervan/subbasins.geojson', 'r') as f:
     subbasin_shapes = json.loads(f.read())

subbasin_graph = utils.create_subbasin_graph()
component_graph = subbasin_graph.copy()
component_graph.remove_nodes_from(['sub-1', 'sub1', 'sub474'])  # remove Lake Erie and sink to get connected components
connected_components = list(nx.connected_components(nx.Graph(component_graph)))

# Split into train/test/val regions
test_subbasins = [1, 474]
train_subbasins = []
val_subbasins = []
for component in connected_components:
    max_x = -999
    for node in component:
        subbasin = list(s['properties'] for s in subbasin_shapes['features'] if 'sub' + str(s['properties']['SubId']) == node)[0]
        max_x = max(max_x, subbasin['INSIDE_X'])
    if max_x < -81.9:
        train_subbasins += list(int(c[3:]) for c in component)
    elif -80.6 > max_x and max_x >= -81.9:
        val_subbasins += list(int(c[3:]) for c in component)
    else:
        test_subbasins += list(int(c[3:]) for c in component)
        
if not spatial_val:  # if no spatial validation, use same graph but different samples
    train_subbasins += val_subbasins
    val_subbasins = train_subbasins
    
train_subgraph = subbasin_graph.subgraph(list('sub' + str(t) for t in train_subbasins))
val_subgraph = subbasin_graph.subgraph(list('sub' + str(t) for t in val_subbasins))
test_subgraph = subbasin_graph.subgraph(list('sub' + str(t) for t in test_subbasins))
if partitioning_strategy == 'unilabel':
    train_adjacency = torch.unsqueeze(torch.from_numpy(nx.to_numpy_array(train_subraph)), 0).float().to(device)
    val_adjacency = torch.unsqueeze(torch.from_numpy(nx.to_numpy_array(val_subgraph)), 0).float().to(device)
    test_adjacency = torch.unsqueeze(torch.from_numpy(nx.to_numpy_array(test_subraph)), 0).float().to(device)
elif partitioning_strategy == 'distance':  # use distances in upstream-graph, i.e. in reversed downstream-graph
    train_adjacency = utils.create_hop_matrix(train_subgraph.reverse(), max_hops).float().to(device)
    val_adjacency = utils.create_hop_matrix(val_subgraph.reverse(), max_hops).float().to(device)
    test_adjacency = utils.create_hop_matrix(test_subgraph.reverse(), max_hops).float().to(device)
else:
    raise Exception('Unsupported partitioning strategy')

subbasins = list(set(train_subbasins + test_subbasins + val_subbasins))

In [6]:
rdrs_vars = [4,5]
agg = ['sum', 'minmax']
include_month = True
dem, landcover, soil, groundwater = True, True, False, False
landcover_types = None

In [7]:
train_dataset = datasets.SubbasinAggregatedDataset(rdrs_vars, train_subbasins, seq_len, seq_steps, train_start, train_end, aggregate_daily=agg, include_months=include_month, 
                                                   dem=dem, landcover=landcover, soil=soil, groundwater=groundwater, landcover_types=landcover_types)
if spatial_val:
    val_dataset = datasets.SubbasinAggregatedDataset(rdrs_vars, val_subbasins, seq_len, seq_steps, train_start, train_end, aggregate_daily=agg, include_months=include_month, 
                                                     conv_scalers=train_dataset.grid_dataset.conv_scalers, dem=dem, landcover=landcover, soil=soil, groundwater=groundwater, landcover_types=landcover_types)
else:
    val_dataset = train_dataset

# Two test datasets: one with spatial and temporal validation (i.e., different graph, different time), and one with only temporal validation (i.e. different time period only)
spatial_test_dataset = datasets.SubbasinAggregatedDataset(rdrs_vars, test_subbasins, seq_len, seq_steps, test_start, test_end, aggregate_daily=agg, include_months=include_month, 
                                                          conv_scalers=train_dataset.grid_dataset.conv_scalers, dem=dem, landcover=landcover, soil=soil, groundwater=groundwater, landcover_types=landcover_types)
temporal_test_dataset = datasets.SubbasinAggregatedDataset(rdrs_vars, train_subbasins, seq_len, seq_steps, test_start, test_end, aggregate_daily=agg, include_months=include_month, 
                                                           conv_scalers=train_dataset.grid_dataset.conv_scalers, dem=dem, landcover=landcover, soil=soil, groundwater=groundwater, landcover_types=landcover_types)

station_subbasins = train_dataset.grid_dataset.simulated_streamflow[~pd.isna(train_dataset.grid_dataset.simulated_streamflow['StationID'])]['subbasin'].unique()

  


Loading subbasin shapes
Aggregating into subbasins


  # This is added back by InteractiveShellApp.init_path()


Loading subbasin shapes
Aggregating into subbasins


  del sys.path[0]


Loading subbasin shapes
Aggregating into subbasins


In [8]:
# Train model
num_epochs = 500
learning_rate = 2e-3
patience = 300
min_improvement = 0.01
best_loss_model = (-1, np.inf, None)
dropout = 0.2
weight_decay = 1e-5

batch_size = 4
model = stgcn.Model(train_dataset.x.shape[2], train_adjacency.shape[0]).to(device)
if num_devices > 1:
    model = torch.nn.DataParallel(model, device_ids=list(range(num_devices)))
loss_fn = evaluate.NSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate, weight_decay=weight_decay)

writer = SummaryWriter(comment='STGCN_simulationTraining')
param_description = {'time_stamp': time_stamp, 'batch_size': batch_size, 'loss': loss_fn, 'include_month': include_month, 'aggregate_daily': agg, 'rdrs_vars': rdrs_vars, 'dropout': dropout, 'spatial_validation': spatial_val, 'val_fraction': val_fraction,
                     'optimizer': optimizer, 'lr': learning_rate, 'patience': patience, 'min_improvement': min_improvement, 'x_train_shape': train_dataset.x.shape, 'x_val_shape': val_dataset.x.shape, 'partitioning_strategy': partitioning_strategy, 'max_hops': max_hops,
                     'spatial_x_test_shape': spatial_test_dataset.x.shape, 'temporal_x_test_shape': temporal_test_dataset.x.shape, 'num_epochs': num_epochs, 'seq_len': seq_len, 'seq_steps': seq_steps, 'train_start': train_start, 'train_end': train_end, 'weight_decay': weight_decay, 
                     'landcover_types': landcover_types, 'test_start': test_start, 'test_end': test_end, 'model': str(model).replace('\n','').replace(' ', ''),}
writer.add_text('Parameter Description', str(param_description))
str(param_description)

"{'time_stamp': '20190822-154141', 'batch_size': 4, 'loss': NSELoss(), 'include_month': True, 'aggregate_daily': ['sum', 'minmax'], 'rdrs_vars': [4, 5], 'dropout': 0.2, 'spatial_validation': False, 'val_fraction': 0.1, 'optimizer': Adam (\nParameter Group 0\n    amsgrad: False\n    betas: (0.9, 0.999)\n    eps: 1e-08\n    lr: 0.002\n    weight_decay: 1e-05\n), 'lr': 0.002, 'patience': 300, 'min_improvement': 0.01, 'x_train_shape': torch.Size([1088, 8, 90, 586]), 'x_val_shape': torch.Size([1088, 8, 90, 586]), 'partitioning_strategy': 'distance', 'max_hops': 10, 'spatial_x_test_shape': torch.Size([730, 8, 90, 138]), 'temporal_x_test_shape': torch.Size([730, 8, 90, 586]), 'num_epochs': 500, 'seq_len': 8, 'seq_steps': 1, 'train_start': datetime.datetime(2010, 1, 9, 0, 0), 'train_end': '2012-12-31', 'weight_decay': 1e-05, 'landcover_types': None, 'test_start': '2013-01-01', 'test_end': '2014-12-31', 'model': 'Model((st_gcn_networks):ModuleList((0):st_gcn((gcn):ConvTemporalGraphical((conv):C

In [9]:
if spatial_val:
    train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size, shuffle=True, pin_memory=True, drop_last=False)
    val_dataloader = torch.utils.data.DataLoader(val_dataset, batch_size, shuffle=True, pin_memory=True, drop_last=False)
else:
    val_indices = np.random.choice(len(train_dataset), size=int(val_fraction * len(train_dataset)), replace=False)
    train_indices = list(i for i in range(len(train_dataset)) if i not in val_indices)
    train_sampler = torch.utils.data.SubsetRandomSampler(train_indices)
    val_sampler = torch.utils.data.SubsetRandomSampler(val_indices)
    train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size, sampler=train_sampler, pin_memory=True, drop_last=False)
    val_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size, sampler=val_sampler, pin_memory=True, drop_last=False)
    
spatial_test_dataloader = torch.utils.data.DataLoader(spatial_test_dataset, batch_size, shuffle=False, pin_memory=True, drop_last=False)
temporal_test_dataloader = torch.utils.data.DataLoader(temporal_test_dataset, batch_size, shuffle=False, pin_memory=True, drop_last=False)

In [10]:
# Subbasins with constant streamflow will divide by zero in loss calculation. Calculate loss without them.
train_non_constant_subbasin_mask = ((train_dataset.y_sim.min(dim=0)[0] - train_dataset.y_sim.max(dim=0)[0]) != 0).to(device)
val_non_constant_subbasin_mask = ((val_dataset.y_sim.min(dim=0)[0] - val_dataset.y_sim.max(dim=0)[0]) != 0).to(device)

y_train_means = train_dataset.y_sim_means[train_non_constant_subbasin_mask].to(device)
y_val_means = val_dataset.y_sim_means[val_non_constant_subbasin_mask].to(device)

In [11]:
torch.manual_seed(0)
np.random.seed(0)
for epoch in range(num_epochs):
    model.train()

    train_losses = torch.tensor(0.0)
    for i, train_batch in enumerate(train_dataloader):
        y_pred = model(train_batch['x'].permute(0,2,1,3).to(device), train_adjacency)
        train_loss = loss_fn(y_pred[:,train_non_constant_subbasin_mask], train_batch['y_sim'][:,train_non_constant_subbasin_mask].to(device), means=y_train_means)
        
        optimizer.zero_grad()
        train_loss.backward()
        optimizer.step()
        
        train_losses += train_loss.detach()
        
    train_loss = (train_losses / len(train_dataloader)).item()
    print('Epoch', epoch, 'mean train loss:\t{}'.format(train_loss))
    writer.add_scalar('loss_nse', train_loss, epoch)
    
    model.eval()
    val_losses = torch.tensor(0.0)
    for i, val_batch in enumerate(val_dataloader):
        y_pred = model(val_batch['x'].permute(0,2,1,3).to(device), val_adjacency).detach()
        val_losses += loss_fn(y_pred[:,val_non_constant_subbasin_mask], val_batch['y_sim'][:,val_non_constant_subbasin_mask].to(device), means=y_val_means).detach()
            
    val_loss = (val_losses / len(val_dataloader)).item()
    print('Epoch', epoch, 'mean val loss:\t{}'.format(val_loss))
    writer.add_scalar('loss_nse_val', val_loss, epoch)
    
    if val_loss < best_loss_model[1] - min_improvement:
        best_loss_model = (epoch, val_loss, model.state_dict())  # new best model
        load_data.pickle_model('STGCN_simulationTraining', model, 'allStations', time_stamp, model_type='torch.dill')
    elif epoch > best_loss_model[0] + patience:
        print('Patience exhausted in epoch {}. Best val-loss was {}'.format(epoch, best_loss_model[1]))
        break
    
print('Using best model from epoch', str(best_loss_model[0]), 'which had loss', str(best_loss_model[1]))
model.load_state_dict(best_loss_model[2])
load_data.save_model_with_state('STGCN_simulationTraining', best_loss_model[0], model, optimizer, time_stamp, use_dill=True)

Epoch 0 mean train loss:	19.281818389892578
Epoch 0 mean val loss:	1.431162714958191
Saved model as /home/mgauch/runoff-nn/src/../pickle/models/STGCN_simulationTraining_allStations_20190822-154141.pkl
Epoch 1 mean train loss:	1.4437609910964966
Epoch 1 mean val loss:	1.5627179145812988
Epoch 2 mean train loss:	1.6598193645477295
Epoch 2 mean val loss:	1.607287049293518
Epoch 3 mean train loss:	1.4336282014846802
Epoch 3 mean val loss:	1.358628273010254
Saved model as /home/mgauch/runoff-nn/src/../pickle/models/STGCN_simulationTraining_allStations_20190822-154141.pkl
Epoch 4 mean train loss:	1.2962325811386108
Epoch 4 mean val loss:	1.3991979360580444
Epoch 5 mean train loss:	1.2519311904907227
Epoch 5 mean val loss:	2.1357080936431885
Epoch 6 mean train loss:	1.1886861324310303
Epoch 6 mean val loss:	1.0849992036819458
Saved model as /home/mgauch/runoff-nn/src/../pickle/models/STGCN_simulationTraining_allStations_20190822-154141.pkl
Epoch 7 mean train loss:	1.2236745357513428
Epoch 7 m

In [12]:
del y_train_means, y_val_means, y_pred, train_non_constant_subbasin_mask, val_non_constant_subbasin_mask
if USE_CUDA:
    torch.cuda.empty_cache()

In [13]:
logger.warning('predicting')
model.eval()

spatial_test_predictions = []  # test on different graph, different time
for i, test_batch in enumerate(spatial_test_dataloader):
    pred = model(test_batch['x'].permute(0,2,1,3).to(device), test_adjacency).detach().cpu()
    spatial_test_predictions.append(pred)
    
temporal_test_predictions = []  # test on train graph but different time
for i, test_batch in enumerate(temporal_test_dataloader):
    pred = model(test_batch['x'].permute(0,2,1,3).to(device), train_adjacency).detach().cpu()
    temporal_test_predictions.append(pred)
    
predictions = torch.cat([torch.cat(spatial_test_predictions), torch.cat(temporal_test_predictions)], dim=1)

2019-08-22 19:44:05,904 - 20190822-154141 - predicting


In [14]:
actuals = spatial_test_dataset.grid_dataset.data_runoff.copy()
if len(actuals['date'].unique()) != len(predictions):
    print('Warning: length of prediction {} and actuals {} does not match.'.format(len(predictions), len(actuals['date'].unique())))

nse_dict, nse_sim_dict = {}, {}
mse_dict, mse_sim_dict = {}, {}
predictions_df = pd.DataFrame(columns=actuals.columns)
predictions_df['is_test_subbasin'] = False
for i in range(len(test_subbasins + train_subbasins)):
    subbasin = (test_subbasins + train_subbasins)[i]
    station = None
    subbasin_sim = spatial_test_dataset.grid_dataset.simulated_streamflow[spatial_test_dataset.grid_dataset.simulated_streamflow['subbasin'] == subbasin].set_index('date')
    if subbasin in station_subbasins:
        station = subbasin_sim['StationID'].values[0]
        act = actuals[actuals['station'] == station].set_index('date')['runoff']
    if predictions.shape[0] != subbasin_sim.shape[0]:
        print('Warning: length of prediction {} and actuals {} does not match for subbasin {}. Ignoring excess actuals.'.format(len(predictions), len(subbasin_sim), subbasin))
        subbasin_sim = subbasin_sim.iloc[:predictions.shape[0]]
        if station is not None:
            act = act.iloc[:predictions.shape[0]]
            
    pred = pd.DataFrame({'runoff': predictions[:,i]}, index=subbasin_sim.index)
    pred['subbasin'] = subbasin
    pred['station'] = station
    pred['is_test_subbasin'] = subbasin in test_subbasins
    predictions_df = predictions_df.append(pred.reset_index(), sort=True)
    subbasin_type = 'test' if subbasin in test_subbasins else 'train'
    nse_sim, mse_sim = evaluate.evaluate_daily('Sub{}'.format(subbasin), pred['runoff'], subbasin_sim['simulated_streamflow'], writer=writer, group=subbasin_type)
    nse_sim_dict[subbasin] = nse_sim
    mse_sim_dict[subbasin] = mse_sim

    if station is not None:
        nse, mse = evaluate.evaluate_daily(station, pred['runoff'], act, writer=writer)
        nse_dict[subbasin] = nse
        mse_dict[subbasin] = mse
        print(station, subbasin, '\tNSE:', nse, '\tMSE:', mse, '(clipped to 0)')
    print(subbasin, '\tNSE sim:', nse_sim, '\tMSE sim:', mse_sim)


To register the converters:
	>>> from pandas.plotting import register_matplotlib_converters
	>>> register_matplotlib_converters()


1 	NSE sim: -1.164510592426435 	MSE sim: 2504182.763353862
474 	NSE sim: -0.028978027387839944 	MSE sim: 1204173.0706403728
2 	NSE sim: 0.37772133765341565 	MSE sim: 52.81744912297085
04215000 688 	NSE: -0.09800678277901631 	MSE: 105.46663998115619 (clipped to 0)
688 	NSE sim: -0.018479343354723232 	MSE sim: 8.199115492397437
5 	NSE sim: -5.442637507356822 	MSE sim: 156.26265364346466
3 	NSE sim: 0.4713107529404016 	MSE sim: 16.823926928582583
4 	NSE sim: -31.670472014409846 	MSE sim: 556.6890076045623
12 	NSE sim: -0.8393013557273867 	MSE sim: 5.412687343491143
8 	NSE sim: -3.922200364117553 	MSE sim: 37.47021227707791
9 	NSE sim: 0.2238607095964228 	MSE sim: 3.1504439860153806
556 	NSE sim: 0.038530852438640184 	MSE sim: 223.13488986462758
04214500 705 	NSE: -0.22011012175683775 	MSE: 147.06851996291005 (clipped to 0)
705 	NSE sim: -0.552265523826075 	MSE sim: 37.16179301212405
11 	NSE sim: -0.2506847626104507 	MSE sim: 15.262283912565843
04215500 706 	NSE: -0.013374685428373612 	MSE

  np.sum((evaluation - np.mean(evaluation)) ** 2, dtype=np.float64))


503 	NSE sim: 0.2709040250258581 	MSE sim: 0.019676137380794035
183 	NSE sim: 0.3528595373614566 	MSE sim: 152.5615441426579
653 	NSE sim: 0.3535409385110817 	MSE sim: 142.4221072253298
651 	NSE sim: 0.2968195773114445 	MSE sim: 25.01375204069221
650 	NSE sim: 0.2796885579685219 	MSE sim: 0.9417432866745346
648 	NSE sim: 0.17409804811838347 	MSE sim: 0.5996300073696881
511 	NSE sim: 0.3205184022011901 	MSE sim: 0.4949352983023547
509 	NSE sim: 0.28899298127548734 	MSE sim: 40.91391204826967
671 	NSE sim: 0.2688046683136277 	MSE sim: 0.2270393377494031
244 	NSE sim: 0.3414280453180142 	MSE sim: 3.0675911857469917
253 	NSE sim: 0.29667313812302787 	MSE sim: 0.8797244300132296
512 	NSE sim: 0.3179437563043849 	MSE sim: 159.8719237832421
506 	NSE sim: 0.2850838413587937 	MSE sim: 25.13943903118942
502 	NSE sim: 0.26031401132706455 	MSE sim: 0.07711414864057164
273 	NSE sim: 0.2583101015497543 	MSE sim: 0.6392967226080636
216 	NSE sim: 0.3375310368178276 	MSE sim: 113.21421944476518
0417450

  np.sum((evaluation - np.mean(evaluation)) ** 2, dtype=np.float64))


525 	NSE sim: -inf 	MSE sim: 3314228.9960238207
526 	NSE sim: 0.26304269799006985 	MSE sim: 0.00028628832688287525
660 	NSE sim: 0.295236828300693 	MSE sim: 0.7429501925613656
531 	NSE sim: 0.28629713181401384 	MSE sim: 0.8530570672405585
532 	NSE sim: 0.20920708549602285 	MSE sim: 0.25992364275813884
587 	NSE sim: 0.19247022108637257 	MSE sim: 3.621109041034223
589 	NSE sim: 0.31793113488858316 	MSE sim: 0.03135482245237567
592 	NSE sim: 0.3106135552541677 	MSE sim: 0.3885221801119732
601 	NSE sim: 0.31875915138687794 	MSE sim: 0.21796936998376987
611 	NSE sim: 0.06679195850750463 	MSE sim: 0.1679781588233028
612 	NSE sim: 0.26581520502583245 	MSE sim: 0.27488848786887543
616 	NSE sim: 0.2475737473625823 	MSE sim: 1.5461196759914824
618 	NSE sim: 0.2627766387311571 	MSE sim: 0.6680637288173139
619 	NSE sim: 0.18209742091952485 	MSE sim: 0.9905761418954654
622 	NSE sim: 0.20804771218327367 	MSE sim: 0.4696175846505767
129 	NSE sim: 0.06052010064728286 	MSE sim: 4.613862401780843
662 	N

In [15]:
def print_nse_mse(name, nse_dict, mse_dict, subbasins):
    nses = list(nse_dict[s] for s in subbasins)
    mses = list(mse_dict[s] for s in subbasins)
    print(name, 'Median NSE (clipped to 0)', np.median(nses), '/ Min', np.min(nses), '/ Max', np.max(nses))
    print(' ' * len(name), 'Median MSE (clipped to 0)', np.median(mses), '/ Min', np.min(mses), '/ Max', np.max(mses))
    
    return np.median(nses)

nse_median_sim_temporal = print_nse_mse('Temporal test sim', nse_sim_dict, mse_sim_dict, train_subbasins)
nse_median_sim_spatial = print_nse_mse('Spatial test sim', nse_sim_dict, mse_sim_dict, test_subbasins)
nse_median_stations_temporal = print_nse_mse('Stations temporal test', nse_dict, mse_dict, list(s for s in station_subbasins if s in train_subbasins))
nse_median_stations_spatial = print_nse_mse('Stations spatial test', nse_dict, mse_dict, list(s for s in station_subbasins if s in test_subbasins))

writer.add_scalar('nse_median_sim_temporal', nse_median_sim_temporal)
writer.add_scalar('nse_median_sim', nse_median_sim_spatial)
writer.add_scalar('nse_median_stations_temporal', nse_median_stations_temporal)
writer.add_scalar('nse_median_stations_spatial', nse_median_stations_spatial)

Temporal test sim Median NSE (clipped to 0) 0.3293067491329917 / Min -inf / Max 0.5503053566874867
                  Median MSE (clipped to 0) 7.933262982988657 / Min 0.00028628832688287525 / Max 3314228.9960238207
Spatial test sim Median NSE (clipped to 0) -0.36448888710717664 / Min -1795.4512268995245 / Max 0.4713107529404016
                 Median MSE (clipped to 0) 52.442489145055745 / Min 0.2402238588483478 / Max 2504182.763353862
Stations temporal test Median NSE (clipped to 0) 0.16154662612700066 / Min -1.823372486379366 / Max 0.3685148924037631
                       Median MSE (clipped to 0) 136.31830426274234 / Min 20.70465915515228 / Max 50574.13250297914
Stations spatial test Median NSE (clipped to 0) -0.16789129092331612 / Min -7.4665962149604805 / Max 0.2675494190407729
                      Median MSE (clipped to 0) 182.98268826296388 / Min 50.17664675783574 / Max 14466.508211451666


In [16]:
nse_dict

{688: -0.09800678277901631,
 705: -0.22011012175683775,
 706: -0.013374685428373612,
 676: -0.11199048544093948,
 677: -0.747536407490655,
 684: -0.11567246008979448,
 685: -0.05885350418646684,
 686: -7.4665962149604805,
 687: -0.9263313742076198,
 704: -0.40636982624895324,
 703: -1.6468673978523998,
 710: 0.2675494190407729,
 702: -1.8490805575214413,
 701: 0.15598461818629417,
 718: 0.105014491184376,
 717: 0.15214896718494597,
 716: 0.12157907993848371,
 714: 0.20123711514725562,
 709: 0.20102247888091607,
 724: 0.1610332053584117,
 715: 0.11753670351292744,
 723: 0.18641191473872953,
 713: 0.28568365411418517,
 712: 0.1898288592043259,
 707: -0.2483894703249483,
 719: -0.10210864256561969,
 720: 0.06028176727494228,
 721: 0.12616001117419984,
 690: -0.3986214146315161,
 696: 0.23201030503574138,
 695: 0.20105263528360817,
 681: 0.20960860682458093,
 691: -0.2633828854282023,
 693: 0.22895510587510648,
 680: 0.3685148924037631,
 678: 0.27677180315756167,
 689: -1.823372486379366,


In [17]:
writer.close()

In [18]:
save_df = pd.merge(predictions_df.rename({'runoff': 'prediction'}, axis=1), 
                   spatial_test_dataset.grid_dataset.simulated_streamflow, on=['date', 'subbasin'])
save_df = pd.merge(save_df, actuals.rename({'runoff': 'actual'}, axis=1), how='left', on=['date', 'station'])\
            [['date', 'subbasin', 'station', 'prediction', 'actual', 'simulated_streamflow', 'is_test_subbasin']]
load_data.pickle_results('STGCN_simulationTraining', save_df, time_stamp)

'STGCN_simulationTraining_20190822-154141.pkl'

In [19]:
_ = print(train_subbasins), print(val_subbasins), print(test_subbasins)

[602, 112, 132, 140, 718, 131, 123, 617, 124, 130, 144, 717, 608, 136, 135, 145, 137, 142, 141, 664, 538, 155, 138, 716, 614, 139, 533, 143, 603, 151, 160, 188, 205, 189, 199, 146, 159, 606, 181, 180, 212, 714, 200, 147, 615, 166, 148, 620, 221, 186, 596, 179, 241, 178, 242, 226, 225, 218, 668, 517, 245, 237, 274, 191, 238, 518, 709, 522, 516, 152, 230, 514, 515, 231, 173, 217, 655, 187, 210, 527, 659, 206, 724, 208, 175, 162, 219, 621, 154, 161, 201, 153, 551, 185, 222, 263, 262, 202, 715, 295, 227, 209, 286, 246, 174, 723, 184, 236, 550, 247, 156, 169, 158, 182, 593, 588, 163, 599, 164, 165, 609, 424, 399, 207, 423, 310, 435, 296, 267, 395, 344, 352, 420, 461, 170, 343, 365, 366, 409, 257, 430, 255, 299, 455, 322, 229, 439, 356, 429, 377, 239, 348, 468, 393, 228, 436, 529, 665, 331, 607, 528, 421, 405, 443, 235, 321, 661, 371, 523, 466, 414, 342, 380, 301, 297, 426, 306, 256, 340, 318, 402, 375, 411, 198, 319, 428, 328, 459, 353, 467, 460, 359, 362, 427, 300, 381, 232, 416, 549, 417,

In [20]:
list(k for k in list(nse_dict.keys()) if k in test_subbasins)

[688, 705, 706, 676, 677, 684, 685, 686, 687, 704, 703, 710, 702, 701]

In [21]:
datetime.now().strftime('%Y%m%d-%H%M%S')

'20190822-194554'