FF-neural network to predict streamflow. Forcings are aggregated from hourly to daily to match streamflow resolution. One model per station.

Use CV for more reliable NSE estimates

In [1]:
%load_ext autoreload
%autoreload 2
import sys
sys.path.append('../..')
import os
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
from sklearn import linear_model
from sklearn.decomposition import PCA
from sklearn import metrics, svm, neural_network, ensemble
from datetime import datetime, timedelta
import hydroeval
import netCDF4 as nc
from src import load_data, evaluate

np.random.seed(0)
time_stamp = datetime.now().strftime('%Y%m%d-%H%M%S')
time_stamp

'20190705-204655'

In [2]:
station_data_dict = load_data.load_train_test_gridded_aggregatedForcings(include_all_cells=True)

In [3]:
# Create test and train splits for each station (by time), then create predictions for each subbasin
history = 7
data_start = datetime.strptime('2010-01-01', '%Y-%m-%d') + timedelta(days=history + 1)
test_starts = [data_start, '2011-01-01', '2012-01-01', '2013-01-01', '2014-01-01']
test_ends = ['2010-12-31', '2011-12-31', '2012-12-31', '2013-12-31', '2014-12-31']

plot_list = ['04159492', '02GC002']
median_nse_mse_list = []
for cv_iter in range(len(test_starts)):
    test_start, test_end = test_starts[cv_iter], test_ends[cv_iter]
    print('Test: {} - {}'.format(test_start, test_end))
    
    predictions = {}
    actuals = {}
    models = {}
    nse_list, mse_list = [], []
    for station in list(station_data_dict.keys()):
        station_data = station_data_dict[station]
        station_data.dropna(how='all', axis=1, inplace=True)
        station_train = station_data[((station_data.index < test_start) | (station_data.index > test_end)) \
                                     & (station_data.index > data_start)].copy()
        station_test = station_data.loc[test_start : test_end]
        
        m = neural_network.MLPRegressor(max_iter=1000, learning_rate='adaptive', hidden_layer_sizes=(50,10,), random_state=123, verbose=True, early_stopping=True, n_iter_no_change=50)#Ridge(alpha=.5, random_state=123)
        m.fit(station_train.drop(['station', 'runoff'], axis=1), station_train['runoff'])

        station_test = station_test[~pd.isna(station_test['runoff'])]
        if len(station_test) == 0:
            print('Skipping', station)
            continue

        predict = pd.DataFrame(index=station_test.index)
        predict = predict.join(station_test.drop(['station', 'runoff'], axis=1))
        predict['runoff'] = np.nan
        predict['runoff'] = m.predict(predict.drop('runoff', axis=1))
        
        predictions['cv{}_{}'.format(cv_iter, station)] = predict[['runoff']]
        actuals['cv{}_{}'.format(cv_iter, station)] = station_test['runoff']
        models['cv{}_{}'.format(cv_iter, station)] = m
        
        nse, mse = evaluate.evaluate_daily(station, predict[['runoff']], station_test['runoff'], plot=station in plot_list)
        nse_list.append(nse)
        mse_list.append(mse)
    
    median_nse_mse_list.append((np.median(nse_list), np.median(mse_list)))
    print('Median NSE: {}\tMSE: {}'.format(*median_nse_mse_list[-1]))

Test: 2010-01-09 00:00:00 - 2010-12-31
Iteration 1, loss = 1619.07497570
Validation score: -0.557895
Iteration 2, loss = 412.32190890
Validation score: -0.781142
Iteration 3, loss = 265.42041918
Validation score: 0.102894
Iteration 4, loss = 196.75246051
Validation score: 0.077429
Iteration 5, loss = 178.16025969
Validation score: 0.016272
Iteration 6, loss = 167.47507262
Validation score: 0.068089
Iteration 7, loss = 147.29228690
Validation score: 0.196740
Iteration 8, loss = 133.93515801
Validation score: 0.220968
Iteration 9, loss = 122.22704925
Validation score: 0.269485
Iteration 10, loss = 120.74255857
Validation score: 0.280626
Iteration 11, loss = 113.82193531
Validation score: 0.249400
Iteration 12, loss = 120.37394407
Validation score: 0.217671
Iteration 13, loss = 123.83280192
Validation score: 0.236096
Iteration 14, loss = 130.30185152
Validation score: 0.164684
Iteration 15, loss = 136.17171329
Validation score: 0.263656
Iteration 16, loss = 118.17782415
Validation score: 


To register the converters:
	>>> from pandas.plotting import register_matplotlib_converters
	>>> register_matplotlib_converters()


Iteration 1, loss = 2404.23166842
Validation score: -33.475827
Iteration 2, loss = 413.35110497
Validation score: -7.811809
Iteration 3, loss = 138.53952101
Validation score: -1.864638
Iteration 4, loss = 77.07850265
Validation score: -1.634960
Iteration 5, loss = 45.88769917
Validation score: -0.454559
Iteration 6, loss = 31.07231018
Validation score: -0.977358
Iteration 7, loss = 24.15911182
Validation score: -0.992776
Iteration 8, loss = 17.56169615
Validation score: -0.345389
Iteration 9, loss = 14.20221431
Validation score: 0.092389
Iteration 10, loss = 12.20982669
Validation score: 0.098696
Iteration 11, loss = 10.40287759
Validation score: 0.201775
Iteration 12, loss = 9.84264344
Validation score: 0.210130
Iteration 13, loss = 9.55870902
Validation score: 0.219954
Iteration 14, loss = 9.23866098
Validation score: 0.244469
Iteration 15, loss = 9.16064184
Validation score: 0.225984
Iteration 16, loss = 9.31847053
Validation score: 0.237543
Iteration 17, loss = 9.11839924
Validatio

In [4]:
median_nse_mse_list

[(0.1310006320593745, 69.52536089262101),
 (0.2533265506511627, 181.5469023612652),
 (-0.3952021524279702, 62.23788990470602),
 (0.33110490010476057, 113.82647782398266),
 (0.25446939968836, 102.02172452933448)]

In [5]:
print('Median NSE: {}\t, MSE: {}'.format(np.median(list(zip(*median_nse_mse_list))[0]), np.median(list(zip(*median_nse_mse_list))[1])))

Median NSE: 0.2533265506511627	, MSE: 102.02172452933448


In [6]:
load_data.pickle_results('NN_VIC_aggregateForcings_CV', (predictions, actuals), time_stamp)

'NN_VIC_aggregateForcings_CV_20190705-204655.pkl'

In [7]:
for station, model in models.items():
    load_data.pickle_model('NN_VIC_aggregateForcings_CV', model, station, time_stamp, model_type='sklearn')

Saved model as ../pickle/models/NN_VIC_aggregateForcings_CV_cv4_02GA010_20190705-204655.pkl
Saved model as ../pickle/models/NN_VIC_aggregateForcings_CV_cv4_02GA018_20190705-204655.pkl
Saved model as ../pickle/models/NN_VIC_aggregateForcings_CV_cv4_02GA038_20190705-204655.pkl
Saved model as ../pickle/models/NN_VIC_aggregateForcings_CV_cv4_02GA047_20190705-204655.pkl
Saved model as ../pickle/models/NN_VIC_aggregateForcings_CV_cv4_02GB001_20190705-204655.pkl
Saved model as ../pickle/models/NN_VIC_aggregateForcings_CV_cv4_02GB007_20190705-204655.pkl
Saved model as ../pickle/models/NN_VIC_aggregateForcings_CV_cv4_02GC002_20190705-204655.pkl
Saved model as ../pickle/models/NN_VIC_aggregateForcings_CV_cv4_02GC007_20190705-204655.pkl
Saved model as ../pickle/models/NN_VIC_aggregateForcings_CV_cv4_02GC010_20190705-204655.pkl
Saved model as ../pickle/models/NN_VIC_aggregateForcings_CV_cv4_02GC018_20190705-204655.pkl
Saved model as ../pickle/models/NN_VIC_aggregateForcings_CV_cv4_02GC026_20190705