XGBoost to predict streamflow. Forcings are aggregated from hourly to daily to match streamflow resolution.

In [1]:
%load_ext autoreload
%autoreload 2
import sys
sys.path.append('..')
import os
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
from sklearn import linear_model
from sklearn.decomposition import PCA
from sklearn import metrics, svm, neural_network, ensemble
from datetime import datetime, timedelta
import hydroeval
import netCDF4 as nc
import xgboost as xgb
from src import load_data, evaluate

np.random.seed(0)

time_stamp = datetime.now().strftime('%Y%m%d-%H%M%S')
time_stamp

'20190703-093045'

In [2]:
station_data_dict = load_data.load_train_test_gridded_aggregatedForcings()

In [None]:
# Create test and train splits for each station (by time), then create predictions for each subbasin
history = 7
train_start = datetime.strptime('2010-01-01', '%Y-%m-%d') + timedelta(days=history + 1)
train_end = '2013-12-31'
test_start = datetime.strptime(train_end, '%Y-%m-%d') + timedelta(days=1)
test_end = '2014-12-31'

predictions = {}
actuals = {}
models = {}
for station in station_data_dict.keys():
    print(station)
    station_data = station_data_dict[station]
    station_train = station_data.loc[train_start : train_end]
    station_test = station_data.loc[test_start : test_end]

    m = xgb.XGBRegressor(seed=123)
    m.fit(station_train.drop(['station', 'runoff'], axis=1), station_train['runoff'])

    station_test = station_test[~pd.isna(station_test['runoff'])]
    if len(station_test) == 0:
        print('Skipping', station)
        continue

    predict = pd.DataFrame(index=station_test.index)
    predict = predict.join(station_test.drop(['station', 'runoff'], axis=1))
    predict['runoff'] = np.nan
    predict['runoff'] = m.predict(predict.drop('runoff', axis=1))

    predictions[station] = predict[['runoff']]
    actuals[station] = station_test['runoff']
    models[station] = m

02GA010


  if getattr(data, 'base', None) is not None and \
  data.base is not None and isinstance(data, np.ndarray) \


02GA018
02GA038
02GA047
02GB001
02GB007
02GC002
02GC007
02GC010
02GC018
02GC026
02GD004
02GE007
02GG002
02GG003
02GG006
02GG009
02GG013
04159492
04159900
04160600
04161820
04164000
04165500
04166100
04166500
04174500
04176500
04177000
04193500


In [None]:
nse_list = []
mse_list = []
for station, predict in predictions.items():
    nse, mse = evaluate.evaluate_daily(station, predict, actuals[station], plot=True)
    nse_list.append(nse)
    mse_list.append(mse)
    
    print(station, '\tNSE:', nse, '\tMSE:', mse, '(clipped to 0)')

print('Median NSE (clipped to 0)', np.median(nse_list), '/ Min', np.min(nse_list), '/ Max', np.max(nse_list))
print('Median MSE (clipped to 0)', np.median(mse_list), '/ Min', np.min(mse_list), '/ Max', np.max(mse_list))

In [5]:
load_data.pickle_results('XGBoost_VIC_aggregateForcings', (predictions, actuals), time_stamp)

'XGBoost_VIC_aggregateForcings_20190703-092630.pkl'

In [6]:
for station, model in models.items():
    load_data.pickle_model('XGBoost_VIC_aggregateForcings', model, station, time_stamp, model_type='xgb')

Saved model as ../pickle/models/XGBoost_VIC_aggregateForcings_02GA010_20190703-092630.pkl
Saved model as ../pickle/models/XGBoost_VIC_aggregateForcings_02GA018_20190703-092630.pkl
Saved model as ../pickle/models/XGBoost_VIC_aggregateForcings_02GA038_20190703-092630.pkl
Saved model as ../pickle/models/XGBoost_VIC_aggregateForcings_02GA047_20190703-092630.pkl
Saved model as ../pickle/models/XGBoost_VIC_aggregateForcings_02GB001_20190703-092630.pkl
Saved model as ../pickle/models/XGBoost_VIC_aggregateForcings_02GB007_20190703-092630.pkl
Saved model as ../pickle/models/XGBoost_VIC_aggregateForcings_02GC002_20190703-092630.pkl
Saved model as ../pickle/models/XGBoost_VIC_aggregateForcings_02GC007_20190703-092630.pkl
Saved model as ../pickle/models/XGBoost_VIC_aggregateForcings_02GC010_20190703-092630.pkl
Saved model as ../pickle/models/XGBoost_VIC_aggregateForcings_02GC018_20190703-092630.pkl
Saved model as ../pickle/models/XGBoost_VIC_aggregateForcings_02GC026_20190703-092630.pkl
Saved mode