In [1]:
import sys
import numpy as np # linear algebra
import matplotlib.pyplot as plt # picture
from math import sqrt
import openpyxl

## pandas data processing
import pandas as pd
from pandas import read_csv
from pandas import DataFrame
from pandas import concat

## keras neural network
import tensorflow as tf
import keras
from keras import Input, Model, optimizers
import keras.backend as K
from keras.layers.core import *
from keras.layers.recurrent import LSTM
from keras.layers.normalization import BatchNormalization, LayerNormalization
from keras.models import *
from keras.optimizers import adam_v2 # adam is not available

## sklearn data processing and cross-validation
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import TimeSeriesSplit

In [None]:
# transport the shape from two dimension(timestep, features) into three dimension(timestep, features, windowsize)
def shape_trans(data, input_window, output_window):
    
    """
    Objective
    ---------
    Transport the shape of raw/sequential data (two dimension) into the shape matching to deep learning model (three dimension)

    Parameters
    ----------
    input_window : The window length of DL input (X)
    output_window : The window length of DL output (Y) - the RR simulation is '1', the multi-pred is '7'

    Route-Params
    ------------
    X : DL input with meteorological and hydrological features
    Y : DL output with future runoff
    id_total : Index of Y
    X_wet, Y_wet, id_wet : Wet period (determined by BPX method) X, Y, index of Y
    X_dry, Y_dry, id_dry : Dry period (determined by BPX method) X, Y, index of Y

    Returns
    -------
    x, y, X_wet, Y_wet, X_dry, Y_dry : Three demension cells
    id_total, id_wet, id_dry : index of corresponding Y
    
    """
    
    X_wet, Y_wet, id_wet = [], [], []
    X_dry, Y_dry, id_dry = [], [], []
    X, Y, id_total = [], [], []
    i = data.index[0]
    
    while i < data.index[-1]-input_window-output_window:
        j = i - data.index[0]
        if data['class'][i] == data['class'][i+input_window]:
            if data['class'][i+input_window] == 1:
                X_wet.append(np.array(data.iloc[j:j+input_window,2:]).T)
                Y_wet.append(np.array(data.iloc[j+input_window:j+input_window+output_window,2]).T)
                id_wet.append(data.index[j+input_window])
            else:
                X_dry.append(np.array(data.iloc[j:j+input_window,2:]).T)
                Y_dry.append(np.array(data.iloc[j+input_window:j+input_window+output_window,2]).T)
                id_dry.append(data.index[j+input_window])
            i = i + 1
        else:
            i = i + 1
            
    for i in range (data.shape[0]-input_window-output_window):
        X.append(np.array(data.iloc[i:i+input_window,2:]).T)
        Y.append(np.array(data.iloc[i+input_window:i+input_window+output_window,2]).T)
        id_total.append(data.index[i+input_window])
                
    X_wet = np.array(X_wet) # wet period x
    Y_wet = np.array(Y_wet) # wet period y
    X_dry = np.array(X_dry) # dry period x
    Y_dry = np.array(Y_dry) # dry period y
    x = np.array(X)
    y = np.array(Y)
    id_total = np.array(id_total)
    id_dry = np.array(id_dry) # dry period index
    id_wet = np.array(id_wet) # wet period index
    
    return x, y, X_wet, Y_wet, X_dry, Y_dry, id_total, id_wet, id_dry

# combine the wet and dry result into entire result
def wet_dry_comb(runoff_wet,runoff_dry,id_wet,id_dry):
    
    """
    Objective
    ---------
    Recombine the wet and dry predicted runoff sub-sequences into long sequence

    Inputs
    ----------
    runoff_wet,runoff_dry : Prediction of wet/dry runoff sub-sequences
    id_wet,id_dry : Index of runoff_wet,runoff_dry

    Outputs
    -------
    box_sort : long predicted sequence
    
    """
    box_wet = pd.DataFrame(runoff_wet)
    box_wet.index = id_wet
    box_dry = pd.DataFrame(runoff_dry)
    box_dry.index = id_dry
    box = pd.concat([box_wet,box_dry],axis=0)
    box_sort = box.sort_index()
    
    return box_sort

In [None]:
# attention
def attention_3d_block(inputs):
    
    """
    Objective
    ---------
    Attention layer in neural network (Keras)
    
    """
    
    input_dim = int(inputs.shape[2])
    TIME_STEPS = int(inputs.shape[1])
    a = Permute((2, 1))(inputs)
    a = Dense(TIME_STEPS, activation='softmax')(a)
    a_probs = Permute((2, 1), name='attention_vec')(a)
    output_attention_mul = Multiply()([inputs, a_probs])
    return output_attention_mul

# plot the loss
def loss_plot(hist, name):
    
    """
    Objective
    ---------
    Loss change among epochs plot
    
    """
    
    plt.plot(hist.history['loss'],color='r')
    plt.plot(hist.history['val_loss'],color='g')
    plt.title(name)
    plt.ylabel('loss')
    plt.xlabel('epoch')
    plt.legend(['train_loss', 'test_loss','train_acc', 'test_acc'], loc='upper left')
    plt.show()
    
# Combined loss function
def CombLoss(y_true, y_pred):
    
    """
    Objective
    ---------
    Combination of NSE (L2) and relative error (L1)
    
    """
    
    a = 0.65
    up = K.mean(K.square(y_true - y_pred))
    down = K.mean(K.square(K.mean(y_true) - y_true))
    bias = K.mean(K.abs(y_true - y_pred)) / y_true
    return a * (up / down) + (1 - a) * bias

In [None]:
# LSTM
def LSTMPretrain(data_train, data_test, layer1_cell = 256, layer2_cell = 256, layer3_cell = 256, lrr_pre = 0.0005, lrr_tune = 0.0005, droprate = 0.3, epochs_pre = 300, epochs_tune = 500):
    
    input_window = 7
    output_window = 1 # multi-step prediction is '7'; RR simulation is '1'

    x_train, y_train, x1_train, y1_train, x0_train, y0_train, id_t_train, id_1_train, id_0_train = shape_trans(data_train, input_window, output_window)
    x_val, y_val, x1_val, y1_val, x0_val, y0_val, id_t_val, id_1_val, id_0_val = shape_trans(data_test, input_window, output_window)
    
    print('- -'*30)
    outputsize = y_train.shape[1]
    time_steps = x_train.shape[1]
    input_vector = x_train.shape[2]

    inputs=Input(shape=(x_train.shape[1], x_train.shape[2],))

    dens = Dense(1024)(inputs)
    # dens = attention_3d_block(dens)
    layer1 = BatchNormalization(trainable=True)(dens)
    attention_probs = Dense(1024, activation='softmax', name='attention_vec')(layer1)
    layer1 =  Multiply()([layer1, attention_probs])
    layer1 = LSTM(units=layer1_cell, input_shape=(time_steps, input_vector),return_sequences=True)(layer1)
    layer1 = BatchNormalization(trainable=True)(layer1)
    layer2 = LSTM(units=layer2_cell, return_sequences=True)(layer1)
    layer3 = LSTM(units=layer3_cell)(layer2)
    drop = SpatialDropout1D(droprate)(layer2)
    layer_flatten = Flatten()(drop)
    layer_dense = Dense(32)(layer_flatten)
    outputs = Dense(outputsize)(layer_dense)

    model = Model(inputs=inputs, outputs=outputs)
    adam = adam_v2.Adam(learning_rate = lrr_pre)
    model.compile(loss=CombLoss, optimizer=adam)
#     model.summary()

    # training
    history = model.fit(x_train,y_train,batch_size=2048,epochs=epochs_pre,verbose=0,validation_data=(x_val,y_val))  
    loss_plot(history,name='Pre-training Loss')
    
    pred = model.predict(x_val)
    pred_1 = model.predict(x_wet_val)    
    pred_0 = model.predict(x_dry_val)
    modelname = 'runoff_pretrain_model.h5'
    
    model.save('modelbox/'+modelname)
    
    ## fine-tune_WET
    for layer in model.layers [:-7]:
        layer.trainable = False # freeze

    model = Model(inputs=inputs, outputs=outputs)
    adam = adam_v2.Adam(learning_rate = lrr_tune)
    model.compile(loss =  CombLoss, optimizer = adam, metrics = CombLoss)

    history = model.fit(x_wet_train,y_wet_train,batch_size=2048,epochs=epochs_tune,verbose=0,validation_data=(x_wet_val,y_wet_val))  
    loss_plot(history,name='Fine-tuning Loss')
    
    pred_wet = model.predict(x_wet_val) 
    
    model = load_model('modelbox/'+modelname,custom_objects={'CombLoss':CombLoss})
    ## fine-tune_DRY
    for layer in model.layers [:-7]:
        layer.trainable = False # freeze

    model = Model(inputs=inputs, outputs=outputs)
    adam = adam_v2.Adam(learning_rate = lrr_tune)
    model.compile(loss =  CombLoss, optimizer = adam, metrics = CombLoss)

    history = model.fit(x_dry_train,y_dry_train,batch_size=2048,epochs=epochs_tune,verbose=0,validation_data=(x_dry_val,y_dry_val))  
    loss_plot(history,name='Fine-tuning Loss')
    
    pred_dry = model.predict(x_dry_val)
    
    box_pred_tuned = wet_dry_comb(pred_wet,pred_dry,id_wet_val,id_dry_val)
    box_pred_pre = wet_dry_comb(pred_1,pred_0,id_wet_val,id_dry_val)
    box_act = wet_dry_comb(y_wet_val,y_dry_val,id_wet_val,id_dry_val)
    
        
    return box_pred_tuned, box_pred_pre, box_act

In [None]:
#time series cross-validation
tscv = TimeSeriesSplit(n_splits = 4)
for train_index, test_index in tscv.split(data):
    data_train, data_test = data.iloc[train_index,:], data.iloc[test_index,:]
    box_pred_tuned, box_pred_pre, box_act = LSTMPretrain(data_train, data_test)

In [None]:
# GRU
def GRUPretrain(data_train, data_test, layer1_cell = 256, layer2_cell = 256, layer3_cell = 256, lrr_pre = 0.0005, lrr_tune = 0.0005, droprate = 0.3, epochs_pre = 300, epochs_tune = 500):
    
    input_window = 7
    output_window = 1 # multi-step prediction is '7'; RR simulation is '1'

    x_train, y_train, x1_train, y1_train, x0_train, y0_train, id_t_train, id_1_train, id_0_train = shape_trans(data_train, input_window, output_window)
    x_val, y_val, x1_val, y1_val, x0_val, y0_val, id_t_val, id_1_val, id_0_val = shape_trans(data_test, input_window, output_window)
    
    print('- -'*30)
    outputsize = y_train.shape[1]
    time_steps = x_train.shape[1]
    input_vector = x_train.shape[2]

    inputs=Input(shape=(x_train.shape[1], x_train.shape[2],))

    dens = Dense(1024)(inputs)
    # dens = attention_3d_block(dens)
    layer1 = BatchNormalization(trainable=True)(dens)
    attention_probs = Dense(1024, activation='softmax', name='attention_vec')(layer1)
    layer1 =  Multiply()([layer1, attention_probs])
    layer1 = GRU(units=layer1_cell, input_shape=(time_steps, input_vector),return_sequences=True)(layer1)
    layer1 = BatchNormalization(trainable=True)(layer1)
    layer2 = GRU(units=layer2_cell, return_sequences=True)(layer1)
    layer3 = GRU(units=layer3_cell)(layer2)
    drop = SpatialDropout1D(droprate)(layer2)
    layer_flatten = Flatten()(drop)
    layer_dense = Dense(32)(layer_flatten)
    outputs = Dense(outputsize)(layer_dense)

    model = Model(inputs=inputs, outputs=outputs)
    adam = adam_v2.Adam(learning_rate = lrr_pre)
    model.compile(loss=CombLoss, optimizer=adam)
#     model.summary()

    # training
    history = model.fit(x_train,y_train,batch_size=2048,epochs=epochs_pre,verbose=0,validation_data=(x_val,y_val))  
    loss_plot(history,name='Pre-training Loss')
    
    pred = model.predict(x_val)
    pred_1 = model.predict(x_wet_val)    
    pred_0 = model.predict(x_dry_val)
    modelname = 'runoff_pretrain_model.h5'
    
    model.save('modelbox/'+modelname)
    
    ## fine-tune_WET
    for layer in model.layers [:-7]:
        layer.trainable = False # freeze

    model = Model(inputs=inputs, outputs=outputs)
    adam = adam_v2.Adam(learning_rate = lrr_tune)
    model.compile(loss =  CombLoss, optimizer = adam, metrics = CombLoss)

    history = model.fit(x_wet_train,y_wet_train,batch_size=2048,epochs=epochs_tune,verbose=0,validation_data=(x_wet_val,y_wet_val))  
    loss_plot(history,name='Fine-tuning Loss')
    
    pred_wet = model.predict(x_wet_val) 
    
    model = load_model('modelbox/'+modelname,custom_objects={'CombLoss':CombLoss})
    ## fine-tune_DRY
    for layer in model.layers [:-7]:
        layer.trainable = False # freeze

    model = Model(inputs=inputs, outputs=outputs)
    adam = adam_v2.Adam(learning_rate = lrr_tune)
    model.compile(loss =  CombLoss, optimizer = adam, metrics = CombLoss)

    history = model.fit(x_dry_train,y_dry_train,batch_size=2048,epochs=epochs_tune,verbose=0,validation_data=(x_dry_val,y_dry_val))  
    loss_plot(history,name='Fine-tuning Loss')
    
    pred_dry = model.predict(x_dry_val)
    
    box_pred_tuned = wet_dry_comb(pred_wet,pred_dry,id_wet_val,id_dry_val)
    box_pred_pre = wet_dry_comb(pred_1,pred_0,id_wet_val,id_dry_val)
    box_act = wet_dry_comb(y_wet_val,y_dry_val,id_wet_val,id_dry_val)
    
        
    return box_pred_tuned, box_pred_pre, box_act

In [None]:
#time series cross-validation
tscv = TimeSeriesSplit(n_splits = 4)
for train_index, test_index in tscv.split(data):
    data_train, data_test = data.iloc[train_index,:], data.iloc[test_index,:]
    box_pred_tuned, box_pred_pre, box_act = GRUPretrain(data_train, data_test)