ConvLSTM trained on simulated streamflow.

In [1]:
%load_ext autoreload
%autoreload 2
import sys
sys.path.append('..')
import os
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt 
from datetime import datetime, timedelta
import netCDF4 as nc
import torch
from torch import nn, utils
from torch.utils.tensorboard import SummaryWriter
from src import load_data, evaluate, conv_lstm, datasets, utils, stgcn
import random
import pickle
import json
import networkx as nx

time_stamp = datetime.now().strftime('%Y%m%d-%H%M%S')
time_stamp

'20190824-125119'

In [2]:
import logging
logger = logging.getLogger()
fhandler = logging.FileHandler(filename='../log.out', mode='a')
chandler = logging.StreamHandler(sys.stdout)
formatter = logging.Formatter('%(asctime)s - {} - %(message)s'.format(time_stamp))
fhandler.setFormatter(formatter)
chandler.setFormatter(formatter)
logger.addHandler(fhandler)
logger.addHandler(chandler)
logger.setLevel(logging.INFO)

In [3]:
USE_CUDA = False
if torch.cuda.is_available():
    print('CUDA Available')
    USE_CUDA = True
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
device = torch.device('cuda:0' if USE_CUDA else 'cpu')
num_devices = torch.cuda.device_count() if USE_CUDA else 0
logger.warning('cuda devices: {}'.format(list(torch.cuda.get_device_name(i) for i in range(num_devices))))
torch.manual_seed(0)
np.random.seed(0)
random.seed(0)

CUDA Available
2019-08-24 12:51:19,928 - 20190824-125119 - cuda devices: ['Tesla V100-SXM2-16GB']


In [4]:
partitioning_strategy = 'distance'  # 'distance' or 'unilabel', see https://arxiv.org/abs/1801.07455
max_hops = 3 if partitioning_strategy == 'distance' else None
rdrs_vars = list(range(8))
agg = ['minmax','minmax','minmax','minmax','sum','minmax','minmax','minmax']
include_month = True
dem, landcover, soil, groundwater = True, True, True, True
landcover_types = None
seq_len = 8
seq_steps = 1

train_start = datetime.strptime('2010-01-01', '%Y-%m-%d') + timedelta(days=seq_len * seq_steps)  # first day for which to make a prediction in train set
train_end = '2012-12-31'
test_start = '2013-01-01'
test_end = '2014-12-31'
spatial_val = False  # Whether the val set does spatial or temporal validation
val_fraction = 0.1 if not spatial_val else None

In [5]:
with open('../data/simulations_shervan/subbasins.geojson', 'r') as f:
     subbasin_shapes = json.loads(f.read())

subbasin_graph = utils.create_subbasin_graph()
component_graph = subbasin_graph.copy()
component_graph.remove_nodes_from(['sub-1', 'sub1', 'sub474'])  # remove Lake Erie and sink to get connected components
connected_components = list(nx.connected_components(nx.Graph(component_graph)))

# Split into train/test/val regions
test_subbasins = [1, 474]
train_subbasins = []
val_subbasins = []
for component in connected_components:
    max_x = -999
    for node in component:
        subbasin = list(s['properties'] for s in subbasin_shapes['features'] if 'sub' + str(s['properties']['SubId']) == node)[0]
        max_x = max(max_x, subbasin['INSIDE_X'])
    if max_x < -81.9:
        train_subbasins += list(int(c[3:]) for c in component)
    elif -80.6 > max_x and max_x >= -81.9:
        val_subbasins += list(int(c[3:]) for c in component)
    else:
        test_subbasins += list(int(c[3:]) for c in component)
        
if not spatial_val:  # if no spatial validation, use same graph but different samples
    train_subbasins += val_subbasins
    val_subbasins = train_subbasins
    
train_subgraph = subbasin_graph.subgraph(list('sub' + str(t) for t in train_subbasins))
val_subgraph = subbasin_graph.subgraph(list('sub' + str(t) for t in val_subbasins))
test_subgraph = subbasin_graph.subgraph(list('sub' + str(t) for t in test_subbasins))
if partitioning_strategy == 'unilabel':
    train_adjacency = torch.unsqueeze(torch.from_numpy(nx.to_numpy_array(train_subgraph)), 0).float().to(device)
    val_adjacency = torch.unsqueeze(torch.from_numpy(nx.to_numpy_array(val_subgraph)), 0).float().to(device)
    test_adjacency = torch.unsqueeze(torch.from_numpy(nx.to_numpy_array(test_subraph)), 0).float().to(device)
elif partitioning_strategy == 'distance':  # use distances in upstream-graph, i.e. in reversed downstream-graph
    train_adjacency = utils.create_hop_matrix(train_subgraph.reverse(), max_hops).float().to(device)
    val_adjacency = utils.create_hop_matrix(val_subgraph.reverse(), max_hops).float().to(device)
    test_adjacency = utils.create_hop_matrix(test_subgraph.reverse(), max_hops).float().to(device)
else:
    raise Exception('Unsupported partitioning strategy')

subbasins = list(set(train_subbasins + test_subbasins + val_subbasins))

In [6]:
train_dataset = datasets.SubbasinAggregatedDataset(rdrs_vars, train_subbasins, seq_len, seq_steps, train_start, train_end, aggregate_daily=agg, include_months=include_month, 
                                                   dem=dem, landcover=landcover, soil=soil, groundwater=groundwater, landcover_types=landcover_types)
if spatial_val:
    val_dataset = datasets.SubbasinAggregatedDataset(rdrs_vars, val_subbasins, seq_len, seq_steps, train_start, train_end, aggregate_daily=agg, include_months=include_month, 
                                                     conv_scalers=train_dataset.scalers, dem=dem, landcover=landcover, soil=soil, groundwater=groundwater, landcover_types=landcover_types)
else:
    val_dataset = train_dataset

# Two test datasets: one with spatial and temporal validation (i.e., different graph, different time), and one with only temporal validation (i.e. different time period only)
spatial_test_dataset = datasets.SubbasinAggregatedDataset(rdrs_vars, test_subbasins, seq_len, seq_steps, test_start, test_end, aggregate_daily=agg, include_months=include_month, 
                                                          conv_scalers=train_dataset.scalers, dem=dem, landcover=landcover, soil=soil, groundwater=groundwater, landcover_types=landcover_types)
temporal_test_dataset = datasets.SubbasinAggregatedDataset(rdrs_vars, train_subbasins, seq_len, seq_steps, test_start, test_end, aggregate_daily=agg, include_months=include_month, 
                                                           conv_scalers=train_dataset.scalers, dem=dem, landcover=landcover, soil=soil, groundwater=groundwater, landcover_types=landcover_types)

station_subbasins = train_dataset.simulated_streamflow[~pd.isna(train_dataset.simulated_streamflow['StationID'])]['subbasin'].unique()

  
cannot be safely cast to variable data type
  soil[i*8 + ((j-1)*4 + layer-1)] = soil_nc['Band{}'.format(layer)][:][::-1,:]\
[Parallel(n_jobs=-1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=-1)]: Done 586 out of 586 | elapsed: 13.9min finished


Saved object to /home/mgauch/runoff-nn/src/../data/train_test/SubbasinAggregatedDataset_0-1-2-3-4-5-6-7_8-1_2010-01-09000000-2012-12-31_dem__minmax-minmax-minmax-minmax-sum-minmax-minmax-minmax_month_2769910dbff44db1894a9bb5f99e35fb.pkl


  # This is added back by InteractiveShellApp.init_path()
cannot be safely cast to variable data type
  soil[i*8 + ((j-1)*4 + layer-1)] = soil_nc['Band{}'.format(layer)][:][::-1,:]\
[Parallel(n_jobs=-1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=-1)]: Done 138 out of 138 | elapsed:  5.0min finished


Saved object to /home/mgauch/runoff-nn/src/../data/train_test/SubbasinAggregatedDataset_0-1-2-3-4-5-6-7_8-1_2013-01-01-2014-12-31_dem__minmax-minmax-minmax-minmax-sum-minmax-minmax-minmax_month_8269b166ac1dc85385d27f156f560e51.pkl


  del sys.path[0]
cannot be safely cast to variable data type
  soil[i*8 + ((j-1)*4 + layer-1)] = soil_nc['Band{}'.format(layer)][:][::-1,:]\
[Parallel(n_jobs=-1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=-1)]: Done 586 out of 586 | elapsed:  6.2min finished


Saved object to /home/mgauch/runoff-nn/src/../data/train_test/SubbasinAggregatedDataset_0-1-2-3-4-5-6-7_8-1_2013-01-01-2014-12-31_dem__minmax-minmax-minmax-minmax-sum-minmax-minmax-minmax_month_2769910dbff44db1894a9bb5f99e35fb.pkl


In [7]:
# Train model
num_epochs = 500
learning_rate = 2e-3
patience = 300
min_improvement = 0.01
best_loss_model = (-1, np.inf, None)
dropout = 0.4
weight_decay = 1e-4

batch_size = 16
temp_kernel_size = 3
model = stgcn.Model(train_dataset.x.shape[2], train_adjacency.shape[0], temp_kernel_size, dropout=dropout).to(device)
if num_devices > 1:
    model = torch.nn.DataParallel(model, device_ids=list(range(num_devices)))
loss_fn = evaluate.NSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate, weight_decay=weight_decay)

writer = SummaryWriter(comment='STGCN_simulationTraining')
param_description = {'time_stamp': time_stamp, 'batch_size': batch_size, 'loss': loss_fn, 'include_month': include_month, 'aggregate_daily': agg, 'rdrs_vars': rdrs_vars, 'dropout': dropout, 'spatial_validation': spatial_val, 'val_fraction': val_fraction, 'temp_kernel': temp_kernel_size,
                     'optimizer': optimizer, 'lr': learning_rate, 'patience': patience, 'min_improvement': min_improvement, 'x_train_shape': train_dataset.x.shape, 'x_val_shape': val_dataset.x.shape, 'partitioning_strategy': partitioning_strategy, 'max_hops': max_hops,
                     'spatial_x_test_shape': spatial_test_dataset.x.shape, 'temporal_x_test_shape': temporal_test_dataset.x.shape, 'num_epochs': num_epochs, 'seq_len': seq_len, 'seq_steps': seq_steps, 'train_start': train_start, 'train_end': train_end, 'weight_decay': weight_decay, 
                     'landcover_types': landcover_types, 'test_start': test_start, 'test_end': test_end, 'model': str(model).replace('\n','').replace(' ', ''),}
writer.add_text('Parameter Description', str(param_description))
str(param_description)

"{'time_stamp': '20190824-125119', 'batch_size': 16, 'loss': NSELoss(), 'include_month': True, 'aggregate_daily': ['minmax', 'minmax', 'minmax', 'minmax', 'sum', 'minmax', 'minmax', 'minmax'], 'rdrs_vars': [0, 1, 2, 3, 4, 5, 6, 7], 'dropout': 0.4, 'spatial_validation': False, 'val_fraction': 0.1, 'temp_kernel': 3, 'optimizer': Adam (\nParameter Group 0\n    amsgrad: False\n    betas: (0.9, 0.999)\n    eps: 1e-08\n    lr: 0.002\n    weight_decay: 0.0001\n), 'lr': 0.002, 'patience': 300, 'min_improvement': 0.01, 'x_train_shape': torch.Size([1088, 8, 178, 586]), 'x_val_shape': torch.Size([1088, 8, 178, 586]), 'partitioning_strategy': 'distance', 'max_hops': 3, 'spatial_x_test_shape': torch.Size([730, 8, 178, 138]), 'temporal_x_test_shape': torch.Size([730, 8, 178, 586]), 'num_epochs': 500, 'seq_len': 8, 'seq_steps': 1, 'train_start': datetime.datetime(2010, 1, 9, 0, 0), 'train_end': '2012-12-31', 'weight_decay': 0.0001, 'landcover_types': None, 'test_start': '2013-01-01', 'test_end': '201

In [8]:
if spatial_val:
    train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size, shuffle=True, pin_memory=True, drop_last=False)
    val_dataloader = torch.utils.data.DataLoader(val_dataset, batch_size, shuffle=True, pin_memory=True, drop_last=False)
else:
    val_indices = np.random.choice(len(train_dataset), size=int(val_fraction * len(train_dataset)), replace=False)
    train_indices = list(i for i in range(len(train_dataset)) if i not in val_indices)
    train_sampler = torch.utils.data.SubsetRandomSampler(train_indices)
    val_sampler = torch.utils.data.SubsetRandomSampler(val_indices)
    train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size, sampler=train_sampler, pin_memory=True, drop_last=False)
    val_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size, sampler=val_sampler, pin_memory=True, drop_last=False)
    
spatial_test_dataloader = torch.utils.data.DataLoader(spatial_test_dataset, batch_size, shuffle=False, pin_memory=True, drop_last=False)
temporal_test_dataloader = torch.utils.data.DataLoader(temporal_test_dataset, batch_size, shuffle=False, pin_memory=True, drop_last=False)

In [9]:
# Subbasins with constant streamflow will divide by zero in loss calculation. Calculate loss without them.
train_non_constant_subbasin_mask = ((train_dataset.y_sim.min(dim=0)[0] - train_dataset.y_sim.max(dim=0)[0]) != 0).to(device)
val_non_constant_subbasin_mask = ((val_dataset.y_sim.min(dim=0)[0] - val_dataset.y_sim.max(dim=0)[0]) != 0).to(device)

y_train_means = train_dataset.y_sim_means[train_non_constant_subbasin_mask].to(device)
y_val_means = val_dataset.y_sim_means[val_non_constant_subbasin_mask].to(device)

In [10]:
torch.manual_seed(0)
np.random.seed(0)
for epoch in range(num_epochs):
    model.train()

    train_losses = torch.tensor(0.0)
    for i, train_batch in enumerate(train_dataloader):
        y_pred = model(train_batch['x'].permute(0,2,1,3).to(device), train_adjacency)
        train_loss = loss_fn(y_pred[:,train_non_constant_subbasin_mask], train_batch['y_sim'][:,train_non_constant_subbasin_mask].to(device), means=y_train_means)
        
        optimizer.zero_grad()
        train_loss.backward()
        optimizer.step()
        
        train_losses += train_loss.detach()
        
    train_loss = (train_losses / len(train_dataloader)).item()
    print('Epoch', epoch, 'mean train loss:\t{}'.format(train_loss))
    writer.add_scalar('loss_nse', train_loss, epoch)
    
    model.eval()
    val_losses = torch.tensor(0.0)
    for i, val_batch in enumerate(val_dataloader):
        y_pred = model(val_batch['x'].permute(0,2,1,3).to(device), val_adjacency).detach()
        val_losses += loss_fn(y_pred[:,val_non_constant_subbasin_mask], val_batch['y_sim'][:,val_non_constant_subbasin_mask].to(device), means=y_val_means).detach()
            
    val_loss = (val_losses / len(val_dataloader)).item()
    print('Epoch', epoch, 'mean val loss:\t{}'.format(val_loss))
    writer.add_scalar('loss_nse_val', val_loss, epoch)
    
    if val_loss < best_loss_model[1] - min_improvement:
        best_loss_model = (epoch, val_loss, model.state_dict())  # new best model
        load_data.pickle_model('STGCN_simulationTraining', model, 'allStations', time_stamp, model_type='torch.dill')
    elif epoch > best_loss_model[0] + patience:
        print('Patience exhausted in epoch {}. Best val-loss was {}'.format(epoch, best_loss_model[1]))
        break
    
print('Using best model from epoch', str(best_loss_model[0]), 'which had loss', str(best_loss_model[1]))
model.load_state_dict(best_loss_model[2])
load_data.save_model_with_state('STGCN_simulationTraining', best_loss_model[0], model, optimizer, time_stamp, use_dill=True)

Epoch 0 mean train loss:	2.201192617416382
Epoch 0 mean val loss:	1.5248926877975464
Saved model as /home/mgauch/runoff-nn/src/../pickle/models/STGCN_simulationTraining_allStations_20190824-125119.pkl
Epoch 1 mean train loss:	1.450818657875061
Epoch 1 mean val loss:	1.4875565767288208
Saved model as /home/mgauch/runoff-nn/src/../pickle/models/STGCN_simulationTraining_allStations_20190824-125119.pkl
Epoch 2 mean train loss:	1.4371440410614014
Epoch 2 mean val loss:	1.4965499639511108
Epoch 3 mean train loss:	1.3204010725021362
Epoch 3 mean val loss:	1.3383451700210571
Saved model as /home/mgauch/runoff-nn/src/../pickle/models/STGCN_simulationTraining_allStations_20190824-125119.pkl
Epoch 4 mean train loss:	1.248177409172058
Epoch 4 mean val loss:	1.2370761632919312
Saved model as /home/mgauch/runoff-nn/src/../pickle/models/STGCN_simulationTraining_allStations_20190824-125119.pkl
Epoch 5 mean train loss:	1.1475125551223755
Epoch 5 mean val loss:	1.17119562625885
Saved model as /home/mgau

In [11]:
del y_train_means, y_val_means, y_pred, train_non_constant_subbasin_mask, val_non_constant_subbasin_mask
if USE_CUDA:
    torch.cuda.empty_cache()

In [12]:
logger.warning('predicting')
model.eval()

spatial_test_predictions = []  # test on different graph, different time
for i, test_batch in enumerate(spatial_test_dataloader):
    pred = model(test_batch['x'].permute(0,2,1,3).to(device), test_adjacency).detach().cpu()
    spatial_test_predictions.append(pred)
    
temporal_test_predictions = []  # test on train graph but different time
for i, test_batch in enumerate(temporal_test_dataloader):
    pred = model(test_batch['x'].permute(0,2,1,3).to(device), train_adjacency).detach().cpu()
    temporal_test_predictions.append(pred)
    
predictions = torch.cat([torch.cat(spatial_test_predictions), torch.cat(temporal_test_predictions)], dim=1)

2019-08-24 14:38:44,795 - 20190824-125119 - predicting


In [13]:
actuals = spatial_test_dataset.data_streamflow.copy()
if len(actuals['date'].unique()) != len(predictions):
    print('Warning: length of prediction {} and actuals {} does not match.'.format(len(predictions), len(actuals['date'].unique())))

nse_dict, nse_sim_dict = {}, {}
mse_dict, mse_sim_dict = {}, {}
predictions_df = pd.DataFrame(columns=actuals.columns)
predictions_df['is_test_subbasin'] = False
for i in range(len(test_subbasins + train_subbasins)):
    subbasin = (test_subbasins + train_subbasins)[i]
    station = None
    subbasin_sim = spatial_test_dataset.simulated_streamflow[spatial_test_dataset.simulated_streamflow['subbasin'] == subbasin].set_index('date')
    if subbasin in station_subbasins:
        station = subbasin_sim['StationID'].values[0]
        act = actuals[actuals['station'] == station].set_index('date')['runoff']
    if predictions.shape[0] != subbasin_sim.shape[0]:
        print('Warning: length of prediction {} and actuals {} does not match for subbasin {}. Ignoring excess actuals.'.format(len(predictions), len(subbasin_sim), subbasin))
        subbasin_sim = subbasin_sim.iloc[:predictions.shape[0]]
        if station is not None:
            act = act.iloc[:predictions.shape[0]]
            
    pred = pd.DataFrame({'runoff': predictions[:,i]}, index=subbasin_sim.index)
    pred['subbasin'] = subbasin
    pred['station'] = station
    pred['is_test_subbasin'] = subbasin in test_subbasins
    predictions_df = predictions_df.append(pred.reset_index(), sort=True)
    subbasin_type = 'test' if subbasin in test_subbasins else 'train'
    nse_sim, mse_sim = evaluate.evaluate_daily('Sub{}'.format(subbasin), pred['runoff'], subbasin_sim['simulated_streamflow'], writer=writer, group=subbasin_type)
    nse_sim_dict[subbasin] = nse_sim
    mse_sim_dict[subbasin] = mse_sim

    if station is not None:
        nse, mse = evaluate.evaluate_daily(station, pred['runoff'], act, writer=writer)
        nse_dict[subbasin] = nse
        mse_dict[subbasin] = mse
        print(station, subbasin, '\tNSE:', nse, '\tMSE:', mse, '(clipped to 0)')
    print(subbasin, '\tNSE sim:', nse_sim, '\tMSE sim:', mse_sim)


To register the converters:
	>>> from pandas.plotting import register_matplotlib_converters
	>>> register_matplotlib_converters()


1 	NSE sim: -1.1243916160909917 	MSE sim: 2457767.999030627
474 	NSE sim: -2.007456963659444 	MSE sim: 3519510.223111459
9 	NSE sim: -15.17487063368236 	MSE sim: 65.65577151230704
556 	NSE sim: -0.04462994620478433 	MSE sim: 242.43459978607441
4 	NSE sim: 0.2586301644151682 	MSE sim: 12.632582653156772
5 	NSE sim: -1.5009232868730549 	MSE sim: 60.65852826878206
04215500 706 	NSE: 0.1956434249718213 	MSE: 145.25466095351004 (clipped to 0)
706 	NSE sim: 0.48153016997942133 	MSE sim: 15.600927217706078
11 	NSE sim: -0.1891945780652593 	MSE sim: 14.51191045122618
8 	NSE sim: 0.443804560157932 	MSE sim: 4.234033492490977
12 	NSE sim: 0.37534499187047965 	MSE sim: 1.8382318079757554
04215000 688 	NSE: 0.2690594989088275 	MSE: 70.20889113372955 (clipped to 0)
688 	NSE sim: -0.7445784911024498 	MSE sim: 14.04446798791828
10 	NSE sim: -0.396955842631572 	MSE sim: 7.9624181518118196
04214500 705 	NSE: 0.24653952435709814 	MSE: 90.81993094508218 (clipped to 0)
705 	NSE sim: 0.3638125816085628 	MS

  np.sum((evaluation - np.mean(evaluation)) ** 2, dtype=np.float64))


502 	NSE sim: 0.4519349540160389 	MSE sim: 0.05713717721831345
506 	NSE sim: 0.4531508349585869 	MSE sim: 19.229501358515062
508 	NSE sim: 0.5109490168716606 	MSE sim: 1.2744825152464017
509 	NSE sim: 0.3799918628753244 	MSE sim: 35.67750770876596
505 	NSE sim: 0.4250266453522564 	MSE sim: 1.6350324797248081
253 	NSE sim: 0.46174614534488767 	MSE sim: 0.6732503636008161
183 	NSE sim: 0.4366810044901114 	MSE sim: 132.80086899446638
215 	NSE sim: 0.4708720086428789 	MSE sim: 0.4243805565140602
244 	NSE sim: 0.5576224909706555 	MSE sim: 2.060569597328841
512 	NSE sim: 0.45908460825325104 	MSE sim: 126.78893431714032
649 	NSE sim: 0.36637459364794733 	MSE sim: 16.147202014395468
650 	NSE sim: 0.41270212224263325 	MSE sim: 0.7678398556273821
507 	NSE sim: 0.39142503499123327 	MSE sim: 0.008476661394043149
280 	NSE sim: 0.47278365024971725 	MSE sim: 0.8589480610839664
243 	NSE sim: 0.43446200199551555 	MSE sim: 49.70928204528615
279 	NSE sim: 0.4017455276040982 	MSE sim: 4.249581500830914
65

  np.sum((evaluation - np.mean(evaluation)) ** 2, dtype=np.float64))


526 	NSE sim: 0.2650382652013733 	MSE sim: 0.0002855131020543156
660 	NSE sim: 0.3551387102621798 	MSE sim: 0.6798025757090722
531 	NSE sim: 0.21897401311467368 	MSE sim: 0.933525375208996
532 	NSE sim: 0.3389921628988377 	MSE sim: 0.21726492708749282
587 	NSE sim: 0.3321904722005946 	MSE sim: 2.994578258224049
589 	NSE sim: 0.30209716994159763 	MSE sim: 0.03208271253067263
592 	NSE sim: 0.3495193174293273 	MSE sim: 0.3665957966525712
601 	NSE sim: 0.4525561851775164 	MSE sim: 0.17515975981371865
611 	NSE sim: 0.3031108276865666 	MSE sim: 0.12544058223275323
612 	NSE sim: 0.4288967082957601 	MSE sim: 0.21382861828273014
616 	NSE sim: 0.32943228911184286 	MSE sim: 1.3779130223787588
618 	NSE sim: 0.47149403401961953 	MSE sim: 0.4789263130882982
619 	NSE sim: 0.3875784590465151 	MSE sim: 0.7417144569141765
622 	NSE sim: 0.2974045227229135 	MSE sim: 0.4166301380792801
122 	NSE sim: 0.121161670119732 	MSE sim: 167.80973093986458
129 	NSE sim: 0.150711817960549 	MSE sim: 4.170923522779354
6

In [14]:
def print_nse_mse(name, nse_dict, mse_dict, subbasins):
    nses = list(nse_dict[s] for s in subbasins)
    mses = list(mse_dict[s] for s in subbasins)
    print(name, 'Median NSE (clipped to 0)', np.nanmedian(nses), '/ Min', np.nanmin(nses), '/ Max', np.nanmax(nses))
    print(' ' * len(name), 'Median MSE (clipped to 0)', np.nanmedian(mses), '/ Min', np.nanmin(mses), '/ Max', np.nanmax(mses))
    
    return np.nanmedian(nses)

nse_median_sim_temporal = print_nse_mse('Temporal test sim', nse_sim_dict, mse_sim_dict, train_subbasins)
nse_median_sim_spatial = print_nse_mse('Spatial test sim', nse_sim_dict, mse_sim_dict, test_subbasins)
nse_median_stations_temporal = print_nse_mse('Stations temporal test', nse_dict, mse_dict, list(s for s in station_subbasins if s in train_subbasins))
nse_median_stations_spatial = print_nse_mse('Stations spatial test', nse_dict, mse_dict, list(s for s in station_subbasins if s in test_subbasins))

writer.add_scalar('nse_median_sim_temporal', nse_median_sim_temporal)
writer.add_scalar('nse_median_sim', nse_median_sim_spatial)
writer.add_scalar('nse_median_stations_temporal', nse_median_stations_temporal)
writer.add_scalar('nse_median_stations_spatial', nse_median_stations_spatial)

Temporal test sim Median NSE (clipped to 0) 0.40321057388733944 / Min -0.5809739142557684 / Max 0.6685976708918703
                  Median MSE (clipped to 0) 7.041553807791319 / Min 0.0002855131020543156 / Max 678248.8555420364
Spatial test sim Median NSE (clipped to 0) -0.19288392319792247 / Min -1995.3140398460628 / Max 0.5672164537558082
                 Median MSE (clipped to 0) 45.176117231270055 / Min 0.611161134639079 / Max 3519510.223111459
Stations temporal test Median NSE (clipped to 0) 0.1894081532990471 / Min -2.4677183640960507 / Max 0.4138737778005863
                       Median MSE (clipped to 0) 133.80256902400836 / Min 18.179814334197516 / Max 49508.58707281006
Stations spatial test Median NSE (clipped to 0) -0.08290635818365999 / Min -1.6495738258153971 / Max 0.2690594989088275
                      Median MSE (clipped to 0) 143.94427741464523 / Min 27.283028273426197 / Max 8435.02803675892


In [15]:
nse_dict

{706: 0.1956434249718213,
 688: 0.2690594989088275,
 705: 0.24653952435709814,
 676: 0.13312731541314404,
 687: -0.12319150634202103,
 686: 0.12755770024448798,
 685: -0.45097700400413077,
 677: -0.1596621435443697,
 684: -0.21344461958632044,
 704: -0.35288723305527414,
 703: -1.6495738258153971,
 710: 0.029011054588729657,
 702: -0.3749301922702757,
 701: -0.04262121002529895,
 718: 0.2243477355209088,
 717: 0.1357049433895302,
 716: 0.11242552837428166,
 714: 0.181127230922692,
 709: 0.3306937381641295,
 724: -0.16229531374673734,
 723: 0.16986608803687786,
 715: 0.20475871336449514,
 713: 0.30073357154003266,
 712: 0.20867164535706118,
 707: -0.2935059753998839,
 720: -0.036295030906830617,
 719: -0.04769541922865739,
 721: 0.1658219580385193,
 690: -0.2775472847183993,
 689: -2.4677183640960507,
 695: 0.19768907567540217,
 683: 0.21871744507355562,
 682: 0.17884998404936414,
 694: 0.15081976528786512,
 693: 0.22664905504527677,
 678: 0.26811276815343477,
 696: 0.2467694074851461,


In [16]:
writer.close()

In [17]:
save_df = pd.merge(predictions_df.rename({'runoff': 'prediction'}, axis=1), 
                   spatial_test_dataset.simulated_streamflow, on=['date', 'subbasin'])
save_df = pd.merge(save_df, actuals.rename({'runoff': 'actual'}, axis=1), how='left', on=['date', 'station'])\
            [['date', 'subbasin', 'station', 'prediction', 'actual', 'simulated_streamflow', 'is_test_subbasin']]
load_data.pickle_results('STGCN_simulationTraining', save_df, time_stamp)

'STGCN_simulationTraining_20190824-125119.pkl'

In [18]:
_ = print(train_subbasins), print(val_subbasins), print(test_subbasins)

[112, 602, 124, 617, 140, 132, 131, 123, 718, 130, 608, 145, 144, 135, 136, 717, 142, 538, 137, 141, 155, 716, 664, 138, 614, 139, 143, 533, 151, 603, 181, 200, 606, 199, 146, 188, 159, 180, 714, 189, 160, 205, 212, 147, 166, 615, 148, 620, 178, 226, 238, 152, 225, 517, 522, 186, 274, 515, 709, 218, 518, 173, 242, 659, 668, 245, 596, 210, 516, 527, 241, 179, 217, 655, 231, 514, 187, 237, 230, 191, 221, 286, 621, 185, 295, 201, 262, 208, 724, 236, 551, 184, 154, 153, 219, 162, 175, 222, 202, 247, 246, 161, 174, 227, 209, 723, 715, 263, 550, 206, 169, 156, 182, 158, 593, 588, 163, 599, 164, 609, 165, 240, 402, 529, 534, 373, 459, 546, 450, 322, 195, 523, 419, 361, 329, 266, 348, 665, 340, 320, 376, 395, 424, 357, 327, 437, 451, 224, 661, 380, 542, 232, 468, 297, 372, 404, 171, 311, 442, 362, 353, 313, 355, 426, 441, 235, 528, 296, 359, 328, 455, 452, 170, 435, 446, 366, 336, 436, 344, 411, 233, 321, 356, 549, 541, 300, 408, 421, 358, 405, 371, 403, 399, 461, 407, 674, 427, 349, 330, 467,

In [19]:
list(k for k in list(nse_dict.keys()) if k in test_subbasins)

[706, 688, 705, 676, 687, 686, 685, 677, 684, 704, 703, 710, 702, 701]

In [20]:
datetime.now().strftime('%Y%m%d-%H%M%S')

'20190824-144030'