ConvLSTM trained on simulated streamflow.

In [1]:
%load_ext autoreload
%autoreload 2
import sys
sys.path.append('..')
import os
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt 
from datetime import datetime, timedelta
from sklearn import preprocessing
import netCDF4 as nc
import torch
from torch import nn, utils
from torch.utils.tensorboard import SummaryWriter
from src import load_data, evaluate, conv_lstm, datasets
import torch.autograd as autograd
from torchvision import transforms
import torchvision.transforms.functional as TF
import random
import pickle

time_stamp = datetime.now().strftime('%Y%m%d-%H%M%S')
time_stamp

'20190814-152847'

In [2]:
import logging
logger = logging.getLogger()
fhandler = logging.FileHandler(filename='../log.out', mode='a')
chandler = logging.StreamHandler(sys.stdout)
formatter = logging.Formatter('%(asctime)s - {} - %(message)s'.format(time_stamp))
fhandler.setFormatter(formatter)
chandler.setFormatter(formatter)
logger.addHandler(fhandler)
logger.addHandler(chandler)
logger.setLevel(logging.INFO)

In [3]:
USE_CUDA = False
if torch.cuda.is_available():
    print('CUDA Available')
    USE_CUDA = True
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
device = torch.device('cuda:0' if USE_CUDA else 'cpu')
num_devices = torch.cuda.device_count() if USE_CUDA else 0
logger.warning('cuda devices: {}'.format(list(torch.cuda.get_device_name(i) for i in range(num_devices))))
torch.manual_seed(0)
np.random.seed(0)
random.seed(0)

CUDA Available
2019-08-14 15:28:48,028 - 20190814-152847 - cuda devices: ['Tesla V100-SXM2-16GB']


In [4]:
landcover_nc = nc.Dataset('../data/NA_NALCMS_LC_30m_LAEA_mmu12_urb05_n40-45w75-90_erie.nc', 'r')
landcover_nc.set_auto_mask(False)
erie_lats = landcover_nc['lat'][:][::-1]
erie_lons = landcover_nc['lon'][:]
landcover_nc.close()
erie_lat_min, erie_lat_max, erie_lon_min, erie_lon_max = erie_lats.min(), erie_lats.max(), erie_lons.min(), erie_lons.max()
del erie_lats, erie_lons

out_lats, out_lons = load_data.load_dem_lats_lons()
out_lats = out_lats[(erie_lat_min <= out_lats) & (out_lats <= erie_lat_max)].copy()
out_lons = out_lons[(erie_lon_min <= out_lons) &  (out_lons <= erie_lon_max)].copy()

In [5]:
seq_len = 8
seq_steps = 1
stateful_lstm = False
validation_fraction, val_start, val_end = None, None, None
random_transform = True

if stateful_lstm:
    val_start = datetime.strptime('2010-01-01', '%Y-%m-%d') + timedelta(days=seq_len * seq_steps)  # first day for which to make a prediction in train set
    val_end = '2010-09-30'
    train_start = '2010-10-01'
    train_end = '2012-12-31'
else:
    validation_fraction = 0.1
    train_start = datetime.strptime('2010-01-01', '%Y-%m-%d') + timedelta(days=seq_len * seq_steps)  # first day for which to make a prediction in train set
    train_end = '2012-12-31'
test_start = '2013-01-01'
test_end = '2014-12-31'

In [6]:
rdrs_vars = [4, 5]
agg = ['sum', 'minmax']
include_month = True
train_dataset = datasets.RdrsGridDataset(rdrs_vars, seq_len, seq_steps, train_start, train_end, aggregate_daily=agg, include_months=True, include_simulated_streamflow=True, resample_rdrs=True, out_lats=out_lats, out_lons=out_lons)
if stateful_lstm:
    val_dataset = datasets.RdrsGridDataset(rdrs_vars, seq_len, seq_steps, val_start, val_end, conv_scalers=train_dataset.conv_scalers, aggregate_daily=agg,  include_months=True, include_simulated_streamflow=True, resample_rdrs=True, out_lats=out_lats, out_lons=out_lons)
test_dataset = datasets.RdrsGridDataset(rdrs_vars, seq_len, seq_steps, test_start, test_end, conv_scalers=train_dataset.conv_scalers, aggregate_daily=agg, include_months=True, include_simulated_streamflow=True, resample_rdrs=True, out_lats=out_lats, out_lons=out_lons)

  exec(code_obj, self.user_global_ns, self.user_ns)
  exec(code_obj, self.user_global_ns, self.user_ns)


In [7]:
landcover_types = []
geophysical_dataset = datasets.GeophysicalGridDataset(dem=True, landcover=False, soil=False, groundwater=False, min_lat=erie_lat_min, max_lat=erie_lat_max, min_lon=erie_lon_min, max_lon=erie_lon_max, landcover_types=landcover_types)
geophysical_data = next(geophysical_dataset.__iter__())

In [8]:
subbasins = train_dataset.simulated_streamflow['subbasin'].unique()
np.random.seed(0)
test_subbasins = np.random.choice(subbasins, size=int(0.2 * len(subbasins)), replace=False)
val_subbasins = np.random.choice(list(s for s in subbasins if s not in test_subbasins), size=int(validation_fraction * len(subbasins)), replace=False)
train_subbasins = list(s for s in subbasins if s not in test_subbasins and s not in val_subbasins)
station_subbasins = train_dataset.simulated_streamflow[~pd.isna(train_dataset.simulated_streamflow['StationID'])]['subbasin'].unique()

train_subbasin_indices = list(train_dataset.outlet_to_row_col[s] for s in train_subbasins)
val_subbasin_indices = list(train_dataset.outlet_to_row_col[s] for s in val_subbasins)
test_subbasin_indices = list(test_dataset.outlet_to_row_col[s] for s in test_subbasins)

train_mask = torch.zeros((train_dataset.out_lats.shape[0], train_dataset.out_lats.shape[1]), dtype=torch.bool)
val_mask = torch.zeros((train_dataset.out_lats.shape[0], train_dataset.out_lats.shape[1]), dtype=torch.bool)
for row in range(train_mask.shape[0]):
    for col in range(train_mask.shape[1]):
        train_mask[row, col] = True if (row, col) in train_subbasin_indices else False
        val_mask[row, col] = True if (row, col) in val_subbasin_indices else False
train_mask = train_mask
val_mask = val_mask

In [9]:
# Train model
num_epochs = 300
learning_rate = 2e-3
patience = 100
min_improvement = 0.01
best_loss_model = (-1, np.inf, None)

# Prepare model
batch_size = 8
num_convlstm_layers = 2
num_conv_layers = 6
convlstm_hidden_dims = [8,8]
conv_hidden_dims = [8,8,8,8,8]
convlstm_kernel_size = [(5,5)] * num_convlstm_layers
conv_kernel_size = [(5,5)] * num_conv_layers
conv_activation = nn.LeakyReLU
dropout = 0.1
weight_decay = 1e-6

model = conv_lstm.ConvLSTMGridWithGeophysicalInput((train_dataset.conv_height, train_dataset.conv_width), train_dataset.n_conv_vars, 
                                                   geophysical_dataset.shape[0], convlstm_hidden_dims, conv_hidden_dims, convlstm_kernel_size, 
                                                   conv_kernel_size, num_convlstm_layers, num_conv_layers, conv_activation, dropout=dropout, 
                                                   geophysical_size=geophysical_dataset.shape[1:], feed_timesteps=seq_len).to(device)
if num_devices > 1:
    model = torch.nn.DataParallel(model, device_ids=list(range(num_devices)))
loss_fn = evaluate.NSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate, weight_decay=weight_decay)

writer = SummaryWriter(comment='ConvLSTM_simulationTraining')
param_description = {'time_stamp': time_stamp, 'H_convlstm': convlstm_hidden_dims, 'H_conv': conv_hidden_dims, 'batch_size': batch_size, 'num_convlstm_layers': num_convlstm_layers, 'num_conv_layers': num_conv_layers, 'convlstm_kernel_size': convlstm_kernel_size, 'conv_kernel_size': conv_kernel_size, 'loss': loss_fn, 
                     'optimizer': optimizer, 'lr': learning_rate, 'patience': patience, 'min_improvement': min_improvement, 'stateful_lstm': stateful_lstm, 'dropout': dropout, 'geophys_shape': geophysical_dataset.shape, 'conv_activation': conv_activation,
                     'num_epochs': num_epochs, 'seq_len': seq_len, 'seq_steps': seq_steps, 'train_start': train_start, 'train_end': train_end, 'weight_decay': weight_decay, 'validation_fraction': validation_fraction, 'landcover_types': landcover_types,
                     'test_start': test_start, 'test_end': test_end, 'n_conv_vars': train_dataset.n_conv_vars, 'model': str(model).replace('\n','').replace(' ', ''), 'val_start': val_start, 'val_end': val_end,
                     'train len': len(train_dataset), 'conv_height': train_dataset.conv_height, 'conv_width': train_dataset.conv_width, 'test len': len(test_dataset), 'random_transform': random_transform}
writer.add_text('Parameter Description', str(param_description))
str(param_description)

"{'time_stamp': '20190814-152847', 'H_convlstm': [8, 8], 'H_conv': [8, 8, 8, 8, 8], 'batch_size': 8, 'num_convlstm_layers': 2, 'num_conv_layers': 6, 'convlstm_kernel_size': [(5, 5), (5, 5)], 'conv_kernel_size': [(5, 5), (5, 5), (5, 5), (5, 5), (5, 5), (5, 5)], 'loss': NSELoss(), 'optimizer': Adam (\nParameter Group 0\n    amsgrad: False\n    betas: (0.9, 0.999)\n    eps: 1e-08\n    lr: 0.002\n    weight_decay: 1e-06\n), 'lr': 0.002, 'patience': 100, 'min_improvement': 0.01, 'stateful_lstm': False, 'dropout': 0.1, 'geophys_shape': torch.Size([1, 460, 848]), 'conv_activation': <class 'torch.nn.modules.activation.LeakyReLU'>, 'num_epochs': 300, 'seq_len': 8, 'seq_steps': 1, 'train_start': datetime.datetime(2010, 1, 9, 0, 0), 'train_end': '2012-12-31', 'weight_decay': 1e-06, 'validation_fraction': 0.1, 'landcover_types': [], 'test_start': '2013-01-01', 'test_end': '2014-12-31', 'n_conv_vars': 15, 'model': 'ConvLSTMGridWithGeophysicalInput((conv_lstm):ConvLSTM((cell_list):ModuleList((0):Con

In [10]:
if stateful_lstm:
    train_sampler = datasets.StatefulBatchSampler(train_dataset, batch_size)
    val_sampler = datasets.StatefulBatchSampler(val_dataset, batch_size)
    test_sampler = datasets.StatefulBatchSampler(test_dataset, batch_size)
    train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_sampler=train_sampler, pin_memory=True)
    val_dataloader = torch.utils.data.DataLoader(val_dataset, batch_sampler=val_sampler, pin_memory=True)
    test_dataloader = torch.utils.data.DataLoader(test_dataset, batch_sampler=test_sampler, pin_memory=True)
else:
    train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size, shuffle=True, pin_memory=True, drop_last=False)
    test_dataloader = torch.utils.data.DataLoader(test_dataset, batch_size, shuffle=False, pin_memory=True, drop_last=False)
    
geophysical_batch = geophysical_data.repeat(batch_size,1,1,1)

In [11]:
def random_transform(rdrs_batch, geophysical_batch, y_batch, train_mask, val_mask, rdrs_contains_month=False, border_masking=0):
    if random_transform and random.random() > 0.5:
        angle = random.randint(-180, 180)
        horizontal_flip = random.choice([True, False])
        vertical_flip = random.choice([True, False])
        transformed_tensors = []
        for tensor in [rdrs_batch, geophysical_batch, y_batch, train_mask.float(), val_mask.float()]:
            images = [TF.to_pil_image(image, mode='F') for image in tensor.reshape((-1,*tensor.shape[-2:]))]
            images = [TF.rotate(image, angle) for image in images]
            images = [TF.hflip(image) for image in images] if horizontal_flip else images
            images = [TF.vflip(image) for image in images] if vertical_flip else images
            
            transformed_tensors.append(torch.cat([TF.to_tensor(image) for image in images]).reshape(tensor.shape))
        
        rdrs_transformed, geophysical_transformed, y_transformed, train_mask_transformed, val_mask_transformed = transformed_tensors
        
        # Fix "month" features that are all-0/all-1 images
        if rdrs_contains_month:
            rdrs_transformed[:,:,-12:] = rdrs_batch[:,:,-12:]
            
        # Do not consider subbasins that are rotated almost out of the image for training
        border_mask = torch.zeros(train_mask.shape, dtype=torch.bool)
        border_mask[border_masking:-border_masking,border_masking:-border_masking] = True
        return rdrs_transformed, geophysical_transformed, y_transformed, train_mask_transformed.bool() & border_mask, val_mask_transformed.bool() & border_mask
    return rdrs_batch, geophysical_batch, y_batch, train_mask, val_mask

In [12]:
y_mean = train_dataset.simulated_streamflow[train_dataset.simulated_streamflow['subbasin'].isin(train_subbasins)]['simulated_streamflow'].mean()
y_mean_val = train_dataset.simulated_streamflow[train_dataset.simulated_streamflow['subbasin'].isin(val_subbasins)]['simulated_streamflow'].mean()

In [13]:
torch.manual_seed(0)
np.random.seed(0)
for epoch in range(num_epochs):
    model.train()

    train_losses = torch.tensor(0.0)
    val_losses = torch.tensor(0.0)
    conv_hidden_states = None
    for i, train_batch in enumerate(train_dataloader):
        geophysical_input = geophysical_batch[:train_batch['y_sim'].shape[0]]
        x_conv, geophysical_input, y_train, train_mask_transformed, val_mask_transformed = random_transform(train_batch['x_conv'], geophysical_input, train_batch['y_sim'], 
                                                                                                            train_mask, val_mask, rdrs_contains_month=include_month, border_masking=20)
        y_train = y_train.reshape((y_train.shape[0],-1)).to(device, non_blocking=True)
        train_mask_transformed = train_mask_transformed.reshape(-1).to(device, non_blocking=True)
        val_mask_transformed = val_mask_transformed.reshape(-1).to(device, non_blocking=True)
        
        if not train_mask_transformed.any():
            print('Batch {} has no target values. skipping.'.format(i))
            continue
        if not stateful_lstm:
            conv_hidden_states = None
        
        y_pred, conv_hidden_states = model(x_conv.to(device), geophysical_input.to(device), hidden_state=conv_hidden_states)
        y_pred = y_pred.reshape((y_train.shape[0], -1))
        train_loss = loss_fn(y_pred[:,train_mask_transformed], y_train[:,train_mask_transformed], mean=y_mean)
        val_losses += loss_fn(y_pred[:,val_mask_transformed], y_train[:,val_mask_transformed], mean=y_mean_val).detach()

        optimizer.zero_grad()
        train_loss.backward()
        optimizer.step()
        
        train_losses += train_loss.detach()
        
    train_loss = (train_losses / len(train_dataloader)).item()
    val_loss = (val_losses / len(train_dataloader)).item()
    print('Epoch', epoch, 'mean train loss:\t{}'.format(train_loss))
    print('Epoch', epoch, 'mean val loss:\t{}'.format(val_loss))
    writer.add_scalar('loss_nse', train_loss, epoch)
    writer.add_scalar('loss_nse_val', val_loss, epoch)
    
    if val_loss < best_loss_model[1] - min_improvement:
        best_loss_model = (epoch, val_loss, model.state_dict())  # new best model
        load_data.pickle_model('ConvLSTM_simulationTraining', model, 'allStations', time_stamp)
    elif epoch > best_loss_model[0] + patience:
        print('Patience exhausted in epoch {}. Best val-loss was {}'.format(epoch, best_loss_model[1]))
        break
    
print('Using best model from epoch', str(best_loss_model[0]), 'which had loss', str(best_loss_model[1]))
model.load_state_dict(best_loss_model[2])
load_data.save_model_with_state('ConvLSTM_simulationTraining', best_loss_model[0], model, optimizer, time_stamp)

Epoch 0 mean train loss:	0.40203961730003357
Epoch 0 mean val loss:	0.5318483710289001
Saved model as ../pickle/models/ConvLSTM_simulationTraining_allStations_20190814-152847.pkl
Epoch 1 mean train loss:	0.36414092779159546
Epoch 1 mean val loss:	0.4745473861694336
Saved model as ../pickle/models/ConvLSTM_simulationTraining_allStations_20190814-152847.pkl
Epoch 2 mean train loss:	0.35666248202323914
Epoch 2 mean val loss:	0.46444135904312134
Saved model as ../pickle/models/ConvLSTM_simulationTraining_allStations_20190814-152847.pkl
Epoch 3 mean train loss:	0.35609444975852966
Epoch 3 mean val loss:	0.46356910467147827
Epoch 4 mean train loss:	0.3521784842014313
Epoch 4 mean val loss:	0.4594478905200958
Epoch 5 mean train loss:	0.3489142060279846
Epoch 5 mean val loss:	0.45343008637428284
Saved model as ../pickle/models/ConvLSTM_simulationTraining_allStations_20190814-152847.pkl
Epoch 6 mean train loss:	0.3474976718425751
Epoch 6 mean val loss:	0.4551641345024109
Epoch 7 mean train loss

In [14]:
del train_mask_transformed, val_mask_transformed, y_train, y_pred
if USE_CUDA:
    torch.cuda.empty_cache()

In [15]:
logger.warning('predicting')
model.eval()

predictions = []
conv_hidden_states = None
for i, test_batch in enumerate(test_dataloader):
    if not stateful_lstm:
        conv_hidden_states = None
        
    geophysical_input = geophysical_batch[:test_batch['y_sim'].shape[0]]
    pred, conv_hidden_states = model(test_batch['x_conv'].to(device), geophysical_input.to(device), hidden_state=conv_hidden_states)
    predictions.append(pred.detach().cpu())
    
predictions = torch.cat(predictions).cpu()

if stateful_lstm:
    # reorder time series
    pred_indices = np.array(list(test_sampler.__iter__())).reshape(-1)
    predictions = predictions[pred_indices.argsort()]

2019-08-14 18:49:57,273 - 20190814-152847 - predicting


In [16]:
actuals = test_dataset.data_runoff.copy()
if len(actuals['date'].unique()) != len(predictions):
    print('Warning: length of prediction {} and actuals {} does not match.'.format(len(predictions), len(actuals['date'].unique())))

nse_dict, nse_sim_dict = {}, {}
mse_dict, mse_sim_dict = {}, {}
predictions_df = pd.DataFrame(columns=actuals.columns)
predictions_df['is_test_subbasin'] = False
predictions_df['is_val_subbasin'] = False
for subbasin in test_dataset.simulated_streamflow['subbasin'].unique():
    row, col = test_dataset.outlet_to_row_col[subbasin]
    
    station = None
    subbasin_sim = test_dataset.simulated_streamflow[test_dataset.simulated_streamflow['subbasin'] == subbasin].set_index('date')
    if subbasin in station_subbasins:
        station = subbasin_sim['StationID'].values[0]
        act = actuals[actuals['station'] == station].set_index('date')['runoff']
    if predictions.shape[0] != subbasin_sim.shape[0]:
        print('Warning: length of prediction {} and actuals {} does not match for subbasin {}. Ignoring excess actuals.'.format(len(predictions), len(subbasin_sim), subbasin))
        subbasin_sim = subbasin_sim.iloc[:predictions.shape[0]]
        if station is not None:
            act = act.iloc[:predictions.shape[0]]
    pred = pd.DataFrame({'runoff': predictions[:,row,col]}, index=subbasin_sim.index)
    pred['subbasin'] = subbasin
    pred['station'] = station
    pred['is_test_subbasin'] = subbasin in test_subbasins
    pred['is_val_subbasin'] = subbasin in val_subbasins
    predictions_df = predictions_df.append(pred.reset_index(), sort=True)
    nse_sim, mse_sim = evaluate.evaluate_daily('Sub{}'.format(subbasin), pred['runoff'], subbasin_sim['simulated_streamflow'], writer=writer)
    nse_sim_dict[subbasin] = nse_sim
    mse_sim_dict[subbasin] = mse_sim

    if station is not None:
        nse, mse = evaluate.evaluate_daily(station, pred['runoff'], act, writer=writer)
        nse_dict[subbasin] = nse
        mse_dict[subbasin] = mse
        print(station, subbasin, '\tNSE:', nse, '\tMSE:', mse, '(clipped to 0)')
    print(subbasin, '\tNSE sim:', nse_sim, '\tMSE sim:', mse_sim)


To register the converters:
	>>> from pandas.plotting import register_matplotlib_converters
	>>> register_matplotlib_converters()


1 	NSE sim: -1.1545880466400607 	MSE sim: 2492703.0929776714
2 	NSE sim: 0.08831216730787173 	MSE sim: 77.38177223957061
3 	NSE sim: 0.47464920108298947 	MSE sim: 16.71769097254955
4 	NSE sim: 0.5118290828780538 	MSE sim: 8.31819580917476
5 	NSE sim: 0.5451207350392842 	MSE sim: 11.03284810746894
6 	NSE sim: 0.3029317848550579 	MSE sim: 4.044977950046006
7 	NSE sim: -3.385747748416491 	MSE sim: 12.16001275515533
8 	NSE sim: -0.5670163200561762 	MSE sim: 11.92889964053241
9 	NSE sim: -0.22158544616439446 	MSE sim: 4.9585642292008805
10 	NSE sim: 0.24494330631074612 	MSE sim: 4.303698756972045
11 	NSE sim: 0.39821323122179153 	MSE sim: 7.343689468758133
12 	NSE sim: -0.1850846945331479 	MSE sim: 3.4874616424821285
13 	NSE sim: -4.910840772555695 	MSE sim: 22.684688259716953
14 	NSE sim: 0.07504524152351144 	MSE sim: 248.1718175784851
15 	NSE sim: -0.2772871339712728 	MSE sim: 153.9471104715012
16 	NSE sim: 0.5101844077007625 	MSE sim: 7.166495984160847
17 	NSE sim: 0.021075490468694835 	

  np.sum((evaluation - np.mean(evaluation)) ** 2, dtype=np.float64))


499 	NSE sim: -inf 	MSE sim: 11.357397766811044
500 	NSE sim: 0.31702714402467014 	MSE sim: 8.012843910073018
501 	NSE sim: -199.5236780024182 	MSE sim: 7.98654103806991
502 	NSE sim: -53.719140684119424 	MSE sim: 5.704609811211811
503 	NSE sim: -273.3827674048344 	MSE sim: 7.40477689041069
504 	NSE sim: -1281.2740168191967 	MSE sim: 8.629908210979524
505 	NSE sim: -0.3189625151254798 	MSE sim: 3.7506895481980855
506 	NSE sim: 0.27248068998648944 	MSE sim: 25.582618488941616
507 	NSE sim: -630.7623884107568 	MSE sim: 8.799632183313374
508 	NSE sim: -0.8854743357596535 	MSE sim: 4.913606467980236
509 	NSE sim: 0.11723654928907234 	MSE sim: 50.797397537095165
510 	NSE sim: -13.892914441110525 	MSE sim: 14.4050997939384
511 	NSE sim: -4.501059230317949 	MSE sim: 4.006978849694323
512 	NSE sim: -0.049361248949362446 	MSE sim: 245.96710779175694
513 	NSE sim: -0.005703112203607974 	MSE sim: 220.95066175600016
514 	NSE sim: -4.9187179355138 	MSE sim: 5.61345730067811
515 	NSE sim: -28.984873

  np.sum((evaluation - np.mean(evaluation)) ** 2, dtype=np.float64))


525 	NSE sim: -inf 	MSE sim: 15.44464391093015
526 	NSE sim: -53514.28819996097 	MSE sim: 20.78926727455814
527 	NSE sim: -936.5856260517377 	MSE sim: 15.373252746255456
528 	NSE sim: -75.56817256276979 	MSE sim: 5.171616423273571
529 	NSE sim: -1275.630306763914 	MSE sim: 13.159078639364761
530 	NSE sim: -0.03142450282516984 	MSE sim: 5.443481438687692
531 	NSE sim: -7.3442844705677715 	MSE sim: 9.973549436302024
532 	NSE sim: -49.19570633186014 	MSE sim: 16.498694666199167
533 	NSE sim: -11531.352530777922 	MSE sim: 22.655492401865644
534 	NSE sim: -11.694123835310158 	MSE sim: 7.760829173750524
535 	NSE sim: -20.244646246195384 	MSE sim: 6.603121685761254
536 	NSE sim: -1.412877617718094 	MSE sim: 6.46402612614632
537 	NSE sim: -1.5035715150443405 	MSE sim: 4.043204758508231
538 	NSE sim: 0.048083621070152405 	MSE sim: 162.95311700982992
539 	NSE sim: -540.5616116994095 	MSE sim: 10.52524893467164
540 	NSE sim: -0.24919945572996793 	MSE sim: 6513.853825559296
541 	NSE sim: 0.0906775

In [17]:
def print_nse_mse(name, nse_dict, mse_dict, subbasins):
    nses = list(nse_dict[s] for s in subbasins)
    mses = list(mse_dict[s] for s in subbasins)
    print(name, 'Median NSE (clipped to 0)', np.median(nses), '/ Min', np.min(nses), '/ Max', np.max(nses))
    print(name, 'Median MSE (clipped to 0)', np.median(mses), '/ Min', np.min(mses), '/ Max', np.max(mses))
    
    return np.median(nses)

print_nse_mse('Train sim', nse_sim_dict, mse_sim_dict, train_subbasins)
print_nse_mse('Val sim', nse_sim_dict, mse_sim_dict, val_subbasins)
nse_median_sim_test = print_nse_mse('Test sim', nse_sim_dict, mse_sim_dict, train_subbasins)
nse_median_stations_train_val = print_nse_mse('Stations (Train/Val)', nse_dict, mse_dict, list(s for s in station_subbasins if s not in test_subbasins))
nse_median_stations_test = print_nse_mse('Stations (Test)', nse_dict, mse_dict, list(s for s in station_subbasins if s in test_subbasins))
nse_median_stations = print_nse_mse('Stations (Train/Val/Test)', nse_dict, mse_dict, station_subbasins)

writer.add_scalar('nse_median_sim', nse_median_sim_test)
writer.add_scalar('nse_median_stations_test', nse_median_stations_test)
writer.add_scalar('nse_median_stations_all', nse_median_stations)

Train sim Median NSE (clipped to 0) -0.14214997088846437 / Min -53514.28819996097 / Max 0.5451207350392842
Train sim Median MSE (clipped to 0) 13.395814371229537 / Min 2.572336793614823 / Max 2492703.0929776714
Val sim Median NSE (clipped to 0) -0.3166535417485715 / Min -1611.6179784987671 / Max 0.43548663174155766
Val sim Median MSE (clipped to 0) 17.65063084778526 / Min 4.043204758508231 / Max 50121.991995750795
Test sim Median NSE (clipped to 0) -0.14214997088846437 / Min -53514.28819996097 / Max 0.5451207350392842
Test sim Median MSE (clipped to 0) 13.395814371229537 / Min 2.572336793614823 / Max 2492703.0929776714
Stations (Train/Val) Median NSE (clipped to 0) 0.015084172416404251 / Min -1.0819934980868178 / Max 0.3181235078061396
Stations (Train/Val) Median MSE (clipped to 0) 161.12531434066113 / Min 13.202358138027764 / Max 94270.16352833122
Stations (Test) Median NSE (clipped to 0) 0.05022017098017273 / Min -0.5025486278274225 / Max 0.24597879256356803
Stations (Test) Median MS

In [18]:
nse_dict

{676: 0.03915351718363025,
 677: -0.4442304719671286,
 678: 0.08890005004290635,
 680: 0.22458897446594106,
 681: 0.25208261724693337,
 682: 0.21030184179618272,
 683: 0.24597879256356803,
 684: -0.057818538905360484,
 685: -0.1553294690629412,
 686: 0.3181235078061396,
 687: -0.692281584967285,
 688: 0.1795099226873933,
 689: -0.8003278606476607,
 690: 0.15310863760115145,
 691: -0.32119187100991464,
 692: -0.29563918510197795,
 693: -0.20804778322001805,
 694: 0.13601995760281316,
 695: 0.17941383664846944,
 696: 0.0028225295204803436,
 697: -0.5025486278274225,
 698: 0.037440222206885365,
 699: 0.1541377652424858,
 700: 0.18314574543099382,
 701: 0.2821003846596646,
 702: -0.010820134358416222,
 703: 0.1311963984361113,
 704: -0.13351775689219436,
 705: 0.1596293682318719,
 706: 0.11577527728281445,
 707: -1.0819934980868178,
 709: -0.31546595545516176,
 710: 0.02734581531232816,
 712: 0.17110413522499912,
 713: -0.3314853939145488,
 714: -0.019465913256695666,
 715: -0.106368822439

In [19]:
writer.close()

In [20]:
save_df = pd.merge(predictions_df.rename({'runoff': 'prediction'}, axis=1), 
                   train_dataset.simulated_streamflow, on=['date', 'subbasin'])
save_df = pd.merge(save_df, actuals.rename({'runoff': 'actual'}, axis=1), how='left', on=['date', 'station'])\
            [['date', 'subbasin', 'station', 'prediction', 'actual', 'simulated_streamflow', 'is_test_subbasin', 'is_val_subbasin']]
load_data.pickle_results('ConvLSTM_simulationTraining', save_df, time_stamp)

'ConvLSTM_simulationTraining_20190814-152847.pkl'

In [21]:
_ = print(train_subbasins), print(val_subbasins), print(test_subbasins)

[1, 3, 4, 5, 6, 10, 12, 13, 14, 16, 17, 18, 20, 21, 22, 23, 24, 25, 26, 27, 29, 30, 31, 33, 34, 35, 36, 37, 39, 42, 43, 45, 46, 47, 48, 52, 54, 55, 57, 58, 59, 62, 63, 64, 66, 67, 70, 73, 74, 75, 78, 79, 81, 82, 84, 85, 87, 88, 89, 91, 92, 94, 95, 96, 98, 99, 100, 101, 102, 103, 105, 108, 109, 110, 111, 112, 113, 115, 116, 117, 118, 120, 121, 122, 124, 125, 126, 127, 128, 130, 131, 132, 133, 134, 135, 136, 138, 139, 140, 141, 142, 144, 145, 147, 148, 149, 150, 152, 153, 154, 155, 156, 159, 160, 161, 162, 164, 165, 166, 167, 168, 169, 174, 175, 177, 178, 179, 180, 181, 183, 184, 185, 187, 189, 190, 191, 192, 193, 194, 195, 196, 198, 199, 200, 201, 202, 203, 204, 206, 208, 209, 211, 212, 213, 214, 217, 218, 219, 220, 221, 223, 224, 225, 226, 227, 228, 229, 231, 233, 234, 235, 237, 238, 239, 241, 242, 243, 246, 249, 250, 251, 254, 255, 257, 260, 261, 262, 263, 265, 266, 270, 271, 273, 274, 276, 277, 278, 279, 280, 281, 282, 283, 285, 286, 288, 289, 290, 291, 292, 293, 295, 296, 297, 298, 

In [22]:
list(k for k in list(nse_dict.keys()) if k in test_subbasins)

[683, 688, 697, 698, 700, 709, 716, 717, 718, 720]

In [23]:
datetime.now().strftime('%Y%m%d-%H%M%S')

'20190814-185217'