ConvLSTM trained on simulated streamflow.

In [1]:
%load_ext autoreload
%autoreload 2
import sys
sys.path.append('..')
import os
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt 
from datetime import datetime, timedelta
import netCDF4 as nc
import torch
from torch import nn, utils
from torch.utils.tensorboard import SummaryWriter
from src import load_data, evaluate, conv_lstm, datasets, utils, stgcn
import random
import pickle
import json
import networkx as nx

time_stamp = datetime.now().strftime('%Y%m%d-%H%M%S')
time_stamp

'20190822-201740'

In [2]:
import logging
logger = logging.getLogger()
fhandler = logging.FileHandler(filename='../log.out', mode='a')
chandler = logging.StreamHandler(sys.stdout)
formatter = logging.Formatter('%(asctime)s - {} - %(message)s'.format(time_stamp))
fhandler.setFormatter(formatter)
chandler.setFormatter(formatter)
logger.addHandler(fhandler)
logger.addHandler(chandler)
logger.setLevel(logging.INFO)

In [3]:
USE_CUDA = False
if torch.cuda.is_available():
    print('CUDA Available')
    USE_CUDA = True
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
device = torch.device('cuda:0' if USE_CUDA else 'cpu')
num_devices = torch.cuda.device_count() if USE_CUDA else 0
logger.warning('cuda devices: {}'.format(list(torch.cuda.get_device_name(i) for i in range(num_devices))))
torch.manual_seed(0)
np.random.seed(0)
random.seed(0)

CUDA Available
2019-08-22 20:17:40,826 - 20190822-201740 - cuda devices: ['Tesla V100-SXM2-16GB']


In [4]:
partitioning_strategy = 'distance'  # 'distance' or 'unilabel', see https://arxiv.org/abs/1801.07455
max_hops = 10 if partitioning_strategy == 'distance' else None
rdrs_vars = [4, 5]
agg = ['sum', 'minmax']
include_month = True
dem, landcover, soil, groundwater = True, False, False, False
landcover_types = []
seq_len = 8
seq_steps = 1

train_start = datetime.strptime('2010-01-01', '%Y-%m-%d') + timedelta(days=seq_len * seq_steps)  # first day for which to make a prediction in train set
train_end = '2012-12-31'
test_start = '2013-01-01'
test_end = '2014-12-31'
spatial_val = False  # Whether the val set does spatial or temporal validation
val_fraction = 0.1 if not spatial_val else None

In [5]:
with open('../data/simulations_shervan/subbasins.geojson', 'r') as f:
     subbasin_shapes = json.loads(f.read())

subbasin_graph = utils.create_subbasin_graph()
component_graph = subbasin_graph.copy()
component_graph.remove_nodes_from(['sub-1', 'sub1', 'sub474'])  # remove Lake Erie and sink to get connected components
connected_components = list(nx.connected_components(nx.Graph(component_graph)))

# Split into train/test/val regions
test_subbasins = [1, 474]
train_subbasins = []
val_subbasins = []
for component in connected_components:
    max_x = -999
    for node in component:
        subbasin = list(s['properties'] for s in subbasin_shapes['features'] if 'sub' + str(s['properties']['SubId']) == node)[0]
        max_x = max(max_x, subbasin['INSIDE_X'])
    if max_x < -81.9:
        train_subbasins += list(int(c[3:]) for c in component)
    elif -80.6 > max_x and max_x >= -81.9:
        val_subbasins += list(int(c[3:]) for c in component)
    else:
        test_subbasins += list(int(c[3:]) for c in component)
        
if not spatial_val:  # if no spatial validation, use same graph but different samples
    train_subbasins += val_subbasins
    val_subbasins = train_subbasins
    
train_subgraph = subbasin_graph.subgraph(list('sub' + str(t) for t in train_subbasins))
val_subgraph = subbasin_graph.subgraph(list('sub' + str(t) for t in val_subbasins))
test_subgraph = subbasin_graph.subgraph(list('sub' + str(t) for t in test_subbasins))
if partitioning_strategy == 'unilabel':
    train_adjacency = torch.unsqueeze(torch.from_numpy(nx.to_numpy_array(train_subraph)), 0).float().to(device)
    val_adjacency = torch.unsqueeze(torch.from_numpy(nx.to_numpy_array(val_subgraph)), 0).float().to(device)
    test_adjacency = torch.unsqueeze(torch.from_numpy(nx.to_numpy_array(test_subraph)), 0).float().to(device)
elif partitioning_strategy == 'distance':  # use distances in upstream-graph, i.e. in reversed downstream-graph
    train_adjacency = utils.create_hop_matrix(train_subgraph.reverse(), max_hops).float().to(device)
    val_adjacency = utils.create_hop_matrix(val_subgraph.reverse(), max_hops).float().to(device)
    test_adjacency = utils.create_hop_matrix(test_subgraph.reverse(), max_hops).float().to(device)
else:
    raise Exception('Unsupported partitioning strategy')

subbasins = list(set(train_subbasins + test_subbasins + val_subbasins))

In [6]:
train_dataset = datasets.SubbasinAggregatedDataset(rdrs_vars, train_subbasins, seq_len, seq_steps, train_start, train_end, aggregate_daily=agg, include_months=include_month, 
                                                   dem=dem, landcover=landcover, soil=soil, groundwater=groundwater, landcover_types=landcover_types)
if spatial_val:
    val_dataset = datasets.SubbasinAggregatedDataset(rdrs_vars, val_subbasins, seq_len, seq_steps, train_start, train_end, aggregate_daily=agg, include_months=include_month, 
                                                     conv_scalers=train_dataset.grid_dataset.conv_scalers, dem=dem, landcover=landcover, soil=soil, groundwater=groundwater, landcover_types=landcover_types)
else:
    val_dataset = train_dataset

# Two test datasets: one with spatial and temporal validation (i.e., different graph, different time), and one with only temporal validation (i.e. different time period only)
spatial_test_dataset = datasets.SubbasinAggregatedDataset(rdrs_vars, test_subbasins, seq_len, seq_steps, test_start, test_end, aggregate_daily=agg, include_months=include_month, 
                                                          conv_scalers=train_dataset.grid_dataset.conv_scalers, dem=dem, landcover=landcover, soil=soil, groundwater=groundwater, landcover_types=landcover_types)
temporal_test_dataset = datasets.SubbasinAggregatedDataset(rdrs_vars, train_subbasins, seq_len, seq_steps, test_start, test_end, aggregate_daily=agg, include_months=include_month, 
                                                           conv_scalers=train_dataset.grid_dataset.conv_scalers, dem=dem, landcover=landcover, soil=soil, groundwater=groundwater, landcover_types=landcover_types)

station_subbasins = train_dataset.grid_dataset.simulated_streamflow[~pd.isna(train_dataset.grid_dataset.simulated_streamflow['StationID'])]['subbasin'].unique()

  


Loading subbasin shapes
Aggregating into subbasins


  # This is added back by InteractiveShellApp.init_path()


Loading subbasin shapes
Aggregating into subbasins


  del sys.path[0]


Loading subbasin shapes
Aggregating into subbasins


In [7]:
# Train model
num_epochs = 1000
learning_rate = 2e-3
patience = 300
min_improvement = 0.01
best_loss_model = (-1, np.inf, None)
dropout = 0.2
weight_decay = 1e-5

batch_size = 32
model = stgcn.Model(train_dataset.x.shape[2], train_adjacency.shape[0]).to(device)
if num_devices > 1:
    model = torch.nn.DataParallel(model, device_ids=list(range(num_devices)))
loss_fn = evaluate.NSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate, weight_decay=weight_decay)

writer = SummaryWriter(comment='STGCN_simulationTraining')
param_description = {'time_stamp': time_stamp, 'batch_size': batch_size, 'loss': loss_fn, 'include_month': include_month, 'aggregate_daily': agg, 'rdrs_vars': rdrs_vars, 'dropout': dropout, 'spatial_validation': spatial_val, 'val_fraction': val_fraction,
                     'optimizer': optimizer, 'lr': learning_rate, 'patience': patience, 'min_improvement': min_improvement, 'x_train_shape': train_dataset.x.shape, 'x_val_shape': val_dataset.x.shape, 'partitioning_strategy': partitioning_strategy, 'max_hops': max_hops,
                     'spatial_x_test_shape': spatial_test_dataset.x.shape, 'temporal_x_test_shape': temporal_test_dataset.x.shape, 'num_epochs': num_epochs, 'seq_len': seq_len, 'seq_steps': seq_steps, 'train_start': train_start, 'train_end': train_end, 'weight_decay': weight_decay, 
                     'landcover_types': landcover_types, 'test_start': test_start, 'test_end': test_end, 'model': str(model).replace('\n','').replace(' ', ''),}
writer.add_text('Parameter Description', str(param_description))
str(param_description)

"{'time_stamp': '20190822-201740', 'batch_size': 32, 'loss': NSELoss(), 'include_month': True, 'aggregate_daily': ['sum', 'minmax'], 'rdrs_vars': [4, 5], 'dropout': 0.2, 'spatial_validation': False, 'val_fraction': 0.1, 'optimizer': Adam (\nParameter Group 0\n    amsgrad: False\n    betas: (0.9, 0.999)\n    eps: 1e-08\n    lr: 0.002\n    weight_decay: 1e-05\n), 'lr': 0.002, 'patience': 300, 'min_improvement': 0.01, 'x_train_shape': torch.Size([1088, 8, 48, 586]), 'x_val_shape': torch.Size([1088, 8, 48, 586]), 'partitioning_strategy': 'distance', 'max_hops': 10, 'spatial_x_test_shape': torch.Size([730, 8, 48, 138]), 'temporal_x_test_shape': torch.Size([730, 8, 48, 586]), 'num_epochs': 1000, 'seq_len': 8, 'seq_steps': 1, 'train_start': datetime.datetime(2010, 1, 9, 0, 0), 'train_end': '2012-12-31', 'weight_decay': 1e-05, 'landcover_types': [], 'test_start': '2013-01-01', 'test_end': '2014-12-31', 'model': 'Model((st_gcn_networks):ModuleList((0):st_gcn((gcn):ConvTemporalGraphical((conv):C

In [8]:
if spatial_val:
    train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size, shuffle=True, pin_memory=True, drop_last=False)
    val_dataloader = torch.utils.data.DataLoader(val_dataset, batch_size, shuffle=True, pin_memory=True, drop_last=False)
else:
    val_indices = np.random.choice(len(train_dataset), size=int(val_fraction * len(train_dataset)), replace=False)
    train_indices = list(i for i in range(len(train_dataset)) if i not in val_indices)
    train_sampler = torch.utils.data.SubsetRandomSampler(train_indices)
    val_sampler = torch.utils.data.SubsetRandomSampler(val_indices)
    train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size, sampler=train_sampler, pin_memory=True, drop_last=False)
    val_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size, sampler=val_sampler, pin_memory=True, drop_last=False)
    
spatial_test_dataloader = torch.utils.data.DataLoader(spatial_test_dataset, batch_size, shuffle=False, pin_memory=True, drop_last=False)
temporal_test_dataloader = torch.utils.data.DataLoader(temporal_test_dataset, batch_size, shuffle=False, pin_memory=True, drop_last=False)

In [9]:
# Subbasins with constant streamflow will divide by zero in loss calculation. Calculate loss without them.
train_non_constant_subbasin_mask = ((train_dataset.y_sim.min(dim=0)[0] - train_dataset.y_sim.max(dim=0)[0]) != 0).to(device)
val_non_constant_subbasin_mask = ((val_dataset.y_sim.min(dim=0)[0] - val_dataset.y_sim.max(dim=0)[0]) != 0).to(device)

y_train_means = train_dataset.y_sim_means[train_non_constant_subbasin_mask].to(device)
y_val_means = val_dataset.y_sim_means[val_non_constant_subbasin_mask].to(device)

In [10]:
torch.manual_seed(0)
np.random.seed(0)
for epoch in range(num_epochs):
    model.train()

    train_losses = torch.tensor(0.0)
    for i, train_batch in enumerate(train_dataloader):
        y_pred = model(train_batch['x'].permute(0,2,1,3).to(device), train_adjacency)
        train_loss = loss_fn(y_pred[:,train_non_constant_subbasin_mask], train_batch['y_sim'][:,train_non_constant_subbasin_mask].to(device), means=y_train_means)
        
        optimizer.zero_grad()
        train_loss.backward()
        optimizer.step()
        
        train_losses += train_loss.detach()
        
    train_loss = (train_losses / len(train_dataloader)).item()
    print('Epoch', epoch, 'mean train loss:\t{}'.format(train_loss))
    writer.add_scalar('loss_nse', train_loss, epoch)
    
    model.eval()
    val_losses = torch.tensor(0.0)
    for i, val_batch in enumerate(val_dataloader):
        y_pred = model(val_batch['x'].permute(0,2,1,3).to(device), val_adjacency).detach()
        val_losses += loss_fn(y_pred[:,val_non_constant_subbasin_mask], val_batch['y_sim'][:,val_non_constant_subbasin_mask].to(device), means=y_val_means).detach()
            
    val_loss = (val_losses / len(val_dataloader)).item()
    print('Epoch', epoch, 'mean val loss:\t{}'.format(val_loss))
    writer.add_scalar('loss_nse_val', val_loss, epoch)
    
    if val_loss < best_loss_model[1] - min_improvement:
        best_loss_model = (epoch, val_loss, model.state_dict())  # new best model
        load_data.pickle_model('STGCN_simulationTraining', model, 'allStations', time_stamp, model_type='torch.dill')
    elif epoch > best_loss_model[0] + patience:
        print('Patience exhausted in epoch {}. Best val-loss was {}'.format(epoch, best_loss_model[1]))
        break
    
print('Using best model from epoch', str(best_loss_model[0]), 'which had loss', str(best_loss_model[1]))
model.load_state_dict(best_loss_model[2])
load_data.save_model_with_state('STGCN_simulationTraining', best_loss_model[0], model, optimizer, time_stamp, use_dill=True)

Epoch 0 mean train loss:	30.58552360534668
Epoch 0 mean val loss:	2.502617835998535
Saved model as /home/mgauch/runoff-nn/src/../pickle/models/STGCN_simulationTraining_allStations_20190822-201740.pkl
Epoch 1 mean train loss:	1.4617267847061157
Epoch 1 mean val loss:	1.5060327053070068
Saved model as /home/mgauch/runoff-nn/src/../pickle/models/STGCN_simulationTraining_allStations_20190822-201740.pkl
Epoch 2 mean train loss:	1.4465203285217285
Epoch 2 mean val loss:	1.5333192348480225
Epoch 3 mean train loss:	1.4328211545944214
Epoch 3 mean val loss:	1.4178640842437744
Saved model as /home/mgauch/runoff-nn/src/../pickle/models/STGCN_simulationTraining_allStations_20190822-201740.pkl
Epoch 4 mean train loss:	1.400990605354309
Epoch 4 mean val loss:	1.4282926321029663
Epoch 5 mean train loss:	1.4175292253494263
Epoch 5 mean val loss:	1.4560816287994385
Epoch 6 mean train loss:	1.4146994352340698
Epoch 6 mean val loss:	1.3918938636779785
Saved model as /home/mgauch/runoff-nn/src/../pickle/m

In [11]:
del y_train_means, y_val_means, y_pred, train_non_constant_subbasin_mask, val_non_constant_subbasin_mask
if USE_CUDA:
    torch.cuda.empty_cache()

In [12]:
logger.warning('predicting')
model.eval()

spatial_test_predictions = []  # test on different graph, different time
for i, test_batch in enumerate(spatial_test_dataloader):
    pred = model(test_batch['x'].permute(0,2,1,3).to(device), test_adjacency).detach().cpu()
    spatial_test_predictions.append(pred)
    
temporal_test_predictions = []  # test on train graph but different time
for i, test_batch in enumerate(temporal_test_dataloader):
    pred = model(test_batch['x'].permute(0,2,1,3).to(device), train_adjacency).detach().cpu()
    temporal_test_predictions.append(pred)
    
predictions = torch.cat([torch.cat(spatial_test_predictions), torch.cat(temporal_test_predictions)], dim=1)

2019-08-22 23:05:55,733 - 20190822-201740 - predicting


In [13]:
actuals = spatial_test_dataset.grid_dataset.data_runoff.copy()
if len(actuals['date'].unique()) != len(predictions):
    print('Warning: length of prediction {} and actuals {} does not match.'.format(len(predictions), len(actuals['date'].unique())))

nse_dict, nse_sim_dict = {}, {}
mse_dict, mse_sim_dict = {}, {}
predictions_df = pd.DataFrame(columns=actuals.columns)
predictions_df['is_test_subbasin'] = False
for i in range(len(test_subbasins + train_subbasins)):
    subbasin = (test_subbasins + train_subbasins)[i]
    station = None
    subbasin_sim = spatial_test_dataset.grid_dataset.simulated_streamflow[spatial_test_dataset.grid_dataset.simulated_streamflow['subbasin'] == subbasin].set_index('date')
    if subbasin in station_subbasins:
        station = subbasin_sim['StationID'].values[0]
        act = actuals[actuals['station'] == station].set_index('date')['runoff']
    if predictions.shape[0] != subbasin_sim.shape[0]:
        print('Warning: length of prediction {} and actuals {} does not match for subbasin {}. Ignoring excess actuals.'.format(len(predictions), len(subbasin_sim), subbasin))
        subbasin_sim = subbasin_sim.iloc[:predictions.shape[0]]
        if station is not None:
            act = act.iloc[:predictions.shape[0]]
            
    pred = pd.DataFrame({'runoff': predictions[:,i]}, index=subbasin_sim.index)
    pred['subbasin'] = subbasin
    pred['station'] = station
    pred['is_test_subbasin'] = subbasin in test_subbasins
    predictions_df = predictions_df.append(pred.reset_index(), sort=True)
    subbasin_type = 'test' if subbasin in test_subbasins else 'train'
    nse_sim, mse_sim = evaluate.evaluate_daily('Sub{}'.format(subbasin), pred['runoff'], subbasin_sim['simulated_streamflow'], writer=writer, group=subbasin_type)
    nse_sim_dict[subbasin] = nse_sim
    mse_sim_dict[subbasin] = mse_sim

    if station is not None:
        nse, mse = evaluate.evaluate_daily(station, pred['runoff'], act, writer=writer)
        nse_dict[subbasin] = nse
        mse_dict[subbasin] = mse
        print(station, subbasin, '\tNSE:', nse, '\tMSE:', mse, '(clipped to 0)')
    print(subbasin, '\tNSE sim:', nse_sim, '\tMSE sim:', mse_sim)


To register the converters:
	>>> from pandas.plotting import register_matplotlib_converters
	>>> register_matplotlib_converters()


1 	NSE sim: -1.1754903448713536 	MSE sim: 2516885.545643153
474 	NSE sim: -1.1464515170700311 	MSE sim: 2511908.9480000017
5 	NSE sim: 0.42808180073352986 	MSE sim: 13.871563530048029
04215000 688 	NSE: -0.08952557167186947 	MSE: 104.6519957981961 (clipped to 0)
688 	NSE sim: 0.0061065244882670955 	MSE sim: 8.001190643709279
556 	NSE sim: 0.1502935783710565 	MSE sim: 197.19733003220585
11 	NSE sim: -0.4239243434930464 	MSE sim: 17.376351139870525
3 	NSE sim: -0.8424567612283744 	MSE sim: 58.63058137152695
04214500 705 	NSE: -0.01692286367924134 	MSE: 122.57691974754083 (clipped to 0)
705 	NSE sim: -0.051498287475814086 	MSE sim: 25.173245886093245
04215500 706 	NSE: -0.025986034278810966 	MSE: 185.2775972484394 (clipped to 0)
706 	NSE sim: 0.05738597852358529 	MSE sim: 28.363565036868387
12 	NSE sim: -0.19524964273313383 	MSE sim: 3.5173750040408427
2 	NSE sim: -3.287122864788908 	MSE sim: 363.88021556297235
8 	NSE sim: -0.3367424444084093 	MSE sim: 10.175941539661975
9 	NSE sim: -2.14

  np.sum((evaluation - np.mean(evaluation)) ** 2, dtype=np.float64))


499 	NSE sim: -inf 	MSE sim: 0.05044674634713313
654 	NSE sim: 0.38967249613191046 	MSE sim: 121.33424152908397
503 	NSE sim: 0.3275664039642432 	MSE sim: 0.018146987871562662
253 	NSE sim: 0.38302440565380025 	MSE sim: 0.7717158727875044
653 	NSE sim: 0.462053394857193 	MSE sim: 118.51560855638864
507 	NSE sim: 0.30733367271931644 	MSE sim: 0.009647945204793704
280 	NSE sim: 0.3579635425390497 	MSE sim: 1.0460145451530658
583 	NSE sim: 0.4077539956100472 	MSE sim: 153.30040152400818
650 	NSE sim: 0.410240057569217 	MSE sim: 0.7710587866928239
216 	NSE sim: 0.44062671822067334 	MSE sim: 95.59543615553247
649 	NSE sim: 0.31764065035022615 	MSE sim: 17.389129531028498
254 	NSE sim: 0.44965912229053506 	MSE sim: 37.2866048201587
505 	NSE sim: 0.30444095418522177 	MSE sim: 1.9779379727436233
647 	NSE sim: 0.421026182995362 	MSE sim: 0.020236571653483568
183 	NSE sim: 0.40760819842710216 	MSE sim: 139.65470126365966
279 	NSE sim: 0.33685451283903 	MSE sim: 4.710521901011145
244 	NSE sim: 0.

  np.sum((evaluation - np.mean(evaluation)) ** 2, dtype=np.float64))


526 	NSE sim: 0.3401489635453754 	MSE sim: 0.0002563345918458382
660 	NSE sim: 0.3093865335062068 	MSE sim: 0.7280337970553752
531 	NSE sim: 0.35133545133554267 	MSE sim: 0.7753196773792757
532 	NSE sim: 0.26061603959482693 	MSE sim: 0.24302616887506964
587 	NSE sim: 0.22114401131909156 	MSE sim: 3.49253059877281
589 	NSE sim: 0.3533281457673889 	MSE sim: 0.029727615804747108
592 	NSE sim: 0.1453554947989688 	MSE sim: 0.48165778267346854
601 	NSE sim: 0.3654518659164421 	MSE sim: 0.20302960001906395
611 	NSE sim: 0.24049727880164518 	MSE sim: 0.13671106876034553
612 	NSE sim: 0.34073605864133405 	MSE sim: 0.24683713036162108
616 	NSE sim: 0.32426675684555606 	MSE sim: 1.3885273929511308
618 	NSE sim: 0.2990871878741934 	MSE sim: 0.6351595072064429
619 	NSE sim: 0.3022120172485264 	MSE sim: 0.8451032500619666
622 	NSE sim: 0.29239576545925505 	MSE sim: 0.41960026711918097
537 	NSE sim: 0.34862593644969875 	MSE sim: 1.051952659426538
128 	NSE sim: 0.14432348541458928 	MSE sim: 2.39425237

In [14]:
def print_nse_mse(name, nse_dict, mse_dict, subbasins):
    nses = list(nse_dict[s] for s in subbasins)
    mses = list(mse_dict[s] for s in subbasins)
    print(name, 'Median NSE (clipped to 0)', np.nanmedian(nses), '/ Min', np.nanmin(nses), '/ Max', np.nanmax(nses))
    print(' ' * len(name), 'Median MSE (clipped to 0)', np.nanmedian(mses), '/ Min', np.nanmin(mses), '/ Max', np.nanmax(mses))
    
    return np.nanmedian(nses)

nse_median_sim_temporal = print_nse_mse('Temporal test sim', nse_sim_dict, mse_sim_dict, train_subbasins)
nse_median_sim_spatial = print_nse_mse('Spatial test sim', nse_sim_dict, mse_sim_dict, test_subbasins)
nse_median_stations_temporal = print_nse_mse('Stations temporal test', nse_dict, mse_dict, list(s for s in station_subbasins if s in train_subbasins))
nse_median_stations_spatial = print_nse_mse('Stations spatial test', nse_dict, mse_dict, list(s for s in station_subbasins if s in test_subbasins))

writer.add_scalar('nse_median_sim_temporal', nse_median_sim_temporal)
writer.add_scalar('nse_median_sim', nse_median_sim_spatial)
writer.add_scalar('nse_median_stations_temporal', nse_median_stations_temporal)
writer.add_scalar('nse_median_stations_spatial', nse_median_stations_spatial)

Temporal test sim Median NSE (clipped to 0) 0.3714717825242064 / Min -inf / Max 0.6118252433754733
                  Median MSE (clipped to 0) 7.484726715260062 / Min 0.0002563345918458382 / Max 90311.89273866997
Spatial test sim Median NSE (clipped to 0) -0.4518774431214886 / Min -5732.5733941631925 / Max 0.5297954381502015
                 Median MSE (clipped to 0) 35.313948132665274 / Min 0.37815545998088596 / Max 2516885.545643153
Stations temporal test Median NSE (clipped to 0) 0.1784495140726437 / Min -1.9960579939710024 / Max 0.4508638758921346
                       Median MSE (clipped to 0) 139.97437280203502 / Min 19.519555990544564 / Max 48892.3668830987
Stations spatial test Median NSE (clipped to 0) -0.19919423614702525 / Min -1.3253505530093599 / Max 0.041980594425713225
                      Median MSE (clipped to 0) 183.78094222640811 / Min 32.31930385853787 / Max 14159.081518292885


In [15]:
nse_dict

{688: -0.08952557167186947,
 705: -0.01692286367924134,
 706: -0.025986034278810966,
 687: -0.885395049031938,
 684: -0.24744420484380214,
 677: -1.3253505530093599,
 686: -0.033489666259068684,
 676: -0.15094426745024836,
 685: -0.27965849885092364,
 704: -0.34520095325278954,
 703: -0.564599370308914,
 710: -0.4106119759002651,
 702: -0.05676603680195624,
 701: 0.041980594425713225,
 718: 0.15563467907696493,
 717: 0.1732148492362654,
 716: 0.12665508779184964,
 714: 0.213944493224817,
 709: -0.3385444822113941,
 724: 0.1939889638784007,
 715: 0.16426141744283695,
 723: 0.21718932866446317,
 713: 0.3094371544269908,
 712: 0.21313282993277427,
 707: -0.31310215756990334,
 720: 0.09371231086378706,
 719: -0.16387145429984806,
 721: 0.15399898659813693,
 696: 0.24234221885976837,
 693: 0.16199444624885173,
 694: 0.08924858384875134,
 683: 0.20643568144744318,
 692: -0.07025235783293482,
 680: 0.4508638758921346,
 681: 0.3253045854775122,
 695: 0.3172614263636706,
 697: 0.244596733064391

In [16]:
writer.close()

In [17]:
save_df = pd.merge(predictions_df.rename({'runoff': 'prediction'}, axis=1), 
                   spatial_test_dataset.grid_dataset.simulated_streamflow, on=['date', 'subbasin'])
save_df = pd.merge(save_df, actuals.rename({'runoff': 'actual'}, axis=1), how='left', on=['date', 'station'])\
            [['date', 'subbasin', 'station', 'prediction', 'actual', 'simulated_streamflow', 'is_test_subbasin']]
load_data.pickle_results('STGCN_simulationTraining', save_df, time_stamp)

'STGCN_simulationTraining_20190822-201740.pkl'

In [18]:
_ = print(train_subbasins), print(val_subbasins), print(test_subbasins)

[112, 602, 140, 132, 123, 718, 130, 131, 617, 124, 144, 717, 136, 145, 135, 608, 141, 137, 538, 142, 155, 138, 716, 664, 614, 139, 603, 533, 151, 143, 714, 205, 180, 181, 188, 606, 159, 199, 160, 200, 212, 189, 146, 615, 147, 166, 148, 620, 238, 245, 515, 242, 659, 173, 709, 218, 274, 231, 241, 516, 178, 187, 225, 518, 517, 514, 655, 179, 186, 522, 237, 152, 596, 210, 527, 668, 230, 221, 226, 217, 191, 174, 550, 222, 161, 551, 185, 202, 263, 286, 201, 247, 206, 295, 162, 227, 209, 153, 724, 175, 715, 236, 219, 208, 621, 184, 246, 723, 154, 262, 169, 156, 593, 158, 182, 163, 588, 599, 164, 609, 165, 340, 417, 336, 437, 300, 308, 435, 402, 542, 666, 198, 331, 267, 523, 297, 661, 420, 430, 311, 353, 528, 229, 266, 377, 228, 352, 372, 450, 347, 214, 422, 394, 376, 339, 443, 371, 529, 213, 548, 355, 395, 321, 380, 426, 207, 306, 421, 403, 534, 393, 404, 546, 170, 467, 256, 381, 442, 375, 364, 452, 424, 298, 441, 374, 387, 240, 233, 436, 361, 363, 408, 328, 296, 382, 414, 440, 322, 429, 320,

In [19]:
list(k for k in list(nse_dict.keys()) if k in test_subbasins)

[688, 705, 706, 687, 684, 677, 686, 676, 685, 704, 703, 710, 702, 701]

In [20]:
datetime.now().strftime('%Y%m%d-%H%M%S')

'20190822-230745'