# Ensemble stacking
Combining models for better generalizing abilities. Will combine the predictions of the best LSTM model and the MLP model. The performance of the two show that they both do generally good, but that one model predicts better at one target than the other. It is therefore assumed that one model manages to extract some information that the other cannot in predicting specific targets. Overall generalizing abilities of the models can most likely be boosted by a technique called "ensemble stacking", where the two models predictions are combined into one final prediction. 

In [16]:
import sys, os
ROOT_PATH = os.path.abspath(".").split("src")[0]
module_path = os.path.abspath(os.path.join(ROOT_PATH+"/src/utils/"))
if module_path not in sys.path:
    sys.path.append(module_path)

import pickle
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from matplotlib import rc 
from tabulate import tabulate

from sklearn.linear_model import LinearRegression

import functions as f
import dl_functions as dlf

In [17]:
# configure matplotlib params and plotting
sns.set()
sns.set_context('paper')
sns.set_style('whitegrid', {'axes.grid': True, 'grid.linestyle': '--'})
rc('figure', figsize=(12,6))
rc('xtick', labelsize=12)
rc('ytick', labelsize=12)
rc('axes', labelsize=13, titlesize=14)
rc('legend', fontsize=14, handlelength=2)
rc('font', family='serif')

In [18]:
# read data and metadata
df_train, df_valid, df_test = f.load_data()
stats, ts, ts_train, ts_valid, ts_test = f.load_metadata()

# split datasets into features and targets
x_train, y_train = f.split_dataset(df_train.values, delay=1)
x_valid, y_valid = f.split_dataset(df_valid.values, delay=1)
x_test, y_test = f.split_dataset(df_test.values, delay=1)

# metadata
target_tags = df_train.columns.values[:3]
feature_tags = df_train.columns.values[3:]
target_stds = stats.loc[target_tags,"Std"].values

In [19]:
def evaluate_error(mean_preds, std_preds, y_true, target_stds=target_stds):
    """
    Will evaluate the MAE of a set of predictions and targets.
    
    :param preds: Matrix of predictions with shape (n_obs, n_target_variables)
    :param targets: Matrix of true targets with shape (n_obs, n_target_variables)
    :param target_stdevs: 1D vector of the standard deviations of the target variables.
    
    :return return_dict: A dictionary with the computed error variables. 
    """
    maes = f.MAE(y_true, mean_preds, vector=True)
    maes_unstd = maes * target_stds
    
    expected_mean = np.mean(mean_preds, axis=0)
    expected_std = np.mean(std_preds, axis=0)

    # summarize in dataframe
    indexes = ["FT", "TT", "PT"]
    cols = ["MAE (std)", "MAE (unstd)", "Expect. Mean", "Expect. Stdev"]
    data = np.column_stack([maes, maes_unstd, expected_mean, expected_std])
    df = pd.DataFrame(data, index=indexes, columns=cols)
    df.loc["Avg"] = df.mean()
    
    str_table = tabulate(df, headers='keys', tablefmt='psql', floatfmt='.5f')

    return_dict = {
        'df': df,
        'str_table': str_table,
        'maes': maes,
        'maes_unstd': maes_unstd
    }

    return return_dict

In [21]:
def train_lm(lstm_model, mlp_model, x_train, y_train, n_pred=30):
    lstm_preds = dlf.predict_with_model(lstm_model, x_train, y_train, n_predictions=n_pred)[0]
    mlp_preds = dlf.predict_with_model(mlp_model, x_train, y_train, n_predictions=n_pred, input_dim=2)[0]
    
    Xtr = np.concatenate((lstm_preds, mlp_preds), axis=1)
    
    # find weights between the predictions of the LSTM model and the MLP by Linear Regr.
    lm = LinearRegression().fit(Xtr,y_train)
    return lm


def predict_lm(lm, lstm_model, mlp_model, X, y, n_pred=30, return_predictions=False):
    # predict with lstm
    lstm_means, lstm_stds, lstm_dict = dlf.predict_with_model(lstm_model, 
                                                              X, y, 
                                                              n_predictions=n_pred, 
                                                              input_dim=3)
    lstm_preds = lstm_dict['pred_matr']

    mlp_means, mlp_stds, mlp_dict = dlf.predict_with_model(mlp_model, 
                                                           X, y, 
                                                           n_predictions=n_pred, 
                                                           input_dim=2)
    mlp_preds = mlp_dict['pred_matr']

    lm_preds_matrix = np.zeros(shape=lstm_preds.shape)
    pred_stds = np.zeros(shape=lstm_preds[0].shape)
    pred_means = np.zeros(shape=lstm_preds[0].shape)

    for t in range(len(X)):
        row_x = np.concatenate((lstm_preds[:,t,:], mlp_preds[:,t,:]), axis=1)
        pred = lm.predict(row_x)

        lm_preds_matrix[:,t,:] = pred
        pred_means[t,:] = np.mean(pred, axis=0)
        pred_stds[t,:] = np.std(pred, axis=0)
    
    if return_predictions: 
        return pred_means, pred_stds, lm_preds_matrix
    else:
        return pred_means, pred_stds
    
    
def evaluate_ensemble(pred_model, lstm_model, mlp_model, train_tuple, 
                      val_tuple, test_tuple, n_pred=30):
    lm = train_lm(lstm_model, mlp_model, train_tuple[0], train_tuple[1], n_pred)
    
    val_preds, val_stds = pred_model(lm, lstm_model, mlp_model, val_tuple[0], val_tuple[1], n_pred)
    val_errs = evaluate_error(val_preds, val_stds, val_tuple[1])
    
    print("\n **** VALIDATION **** ")
    print(val_errs['str_table'])
    
    test_preds, test_stds = pred_model(lm, lstm_model, mlp_model, test_tuple[0], test_tuple[1], n_pred)
    test_errs = evaluate_error(test_preds, test_stds, test_tuple[1])
    
    print("\n **** TEST **** ")
    print(test_errs['str_table'])
    
    return val_errs, test_errs

In [7]:
%%capture
# load models
lstm_model = dlf.load_keras_model(ROOT_PATH + "models/lstm_128/50/")
mlp_model = dlf.load_keras_model(ROOT_PATH + "models/mlp_1024/50/")

In [8]:
n_pred = 300
val_lm, test_lm = evaluate_ensemble(predict_lm, 
                                    lstm_model, 
                                    mlp_model, 
                                    (x_train, y_train), 
                                    (x_valid, y_valid), 
                                    (x_test, y_test), 
                                    n_pred=n_pred)


 **** VALIDATION **** 
+-----+-------------+---------------+----------------+-----------------+
|     |   MAE (std) |   MAE (unstd) |   Expect. Mean |   Expect. Stdev |
|-----+-------------+---------------+----------------+-----------------|
| FT  |     0.58246 |    1966.76916 |       -0.26479 |         0.21083 |
| TT  |     0.28140 |       0.27479 |        1.29391 |         0.25039 |
| PT  |     0.40585 |       0.10044 |        0.01516 |         0.18565 |
| Avg |     0.42323 |     655.71480 |        0.34809 |         0.21563 |
+-----+-------------+---------------+----------------+-----------------+

 **** TEST **** 
+-----+-------------+---------------+----------------+-----------------+
|     |   MAE (std) |   MAE (unstd) |   Expect. Mean |   Expect. Stdev |
|-----+-------------+---------------+----------------+-----------------|
| FT  |     0.51099 |    1725.43436 |       -0.38791 |         0.22157 |
| TT  |     0.26899 |       0.26267 |        1.74810 |         0.28109 |
| PT  |  

In [11]:
ensemble_dict = {
    "validation": val_lm,
    "test": test_lm
}

# Error summary
dicts = [ensemble_dict]
columns=["FT", "TT", "PT", "Avg"]
index = ["LSTM/MLP"]
summary, tex = dlf.get_df_from_dicts(dicts, columns, index)
print(tex)
print(summary)
print()

# Uncertainty summary
columns=["MAE", "Exp. Mean", "Exp. Std"]
index = ["LSTM/MLP"]
levels = ["FT","TT","PT","Avg"]
ensemble_unc, tex_unc = dlf.get_uncertainty_df_from_dicts(dicts, columns, index, levels)
print(tex_unc)
print(ensemble_unc)
print()

\begin{tabular}{lcccccccc}
\toprule
{} & \multicolumn{4}{c}{Validation} & \multicolumn{4}{c}{Test} \\
{} &         FT &      TT &      PT &     Avg &     FT &     TT &      PT &     Avg \\
\midrule
\textbf{LSTM/MLP} &     0.5825 &  0.2814 &  0.4058 &  0.4232 &  0.511 &  0.269 &  0.3856 &  0.3885 \\
\bottomrule
\end{tabular}

         Validation                           Test                       
                 FT      TT      PT     Avg     FT     TT      PT     Avg
LSTM/MLP     0.5825  0.2814  0.4058  0.4232  0.511  0.269  0.3856  0.3885

\begin{tabular}{llcccccc}
\toprule
         &     & \multicolumn{3}{c}{Validation} & \multicolumn{3}{c}{Test} \\
         &     &        MAE & Exp. Mean & Exp. Std &     MAE & Exp. Mean & Exp. Std \\
\midrule
\multirow{4}{*}{\textbf{LSTM/MLP}} & \textbf{FT} &     0.5825 &   -0.2648 &   0.2108 &  0.5110 &   -0.3879 &   0.2216 \\
         & \textbf{TT} &     0.2814 &    1.2939 &   0.2504 &  0.2690 &    1.7481 &   0.2811 \\
         & \textbf{PT} & 

#### Export results

In [12]:
path = ROOT_PATH + "models/dataframes/"
summary.to_pickle(path + "ensemble_summary_df.pkl")
ensemble_unc.to_pickle(path + "ensemble_uncertainty_df.pkl")

In [None]:
f.save_pickle(lm, ROOT_PATH + "models/ensemble/linear_model.pkl")
lstm_model.save(ROOT_PATH + "models/ensemble/lstm_model.h5")
mlp_model.save(ROOT_PATH + "models/ensemble/mlp_model.h5")

### Example

In [32]:
# load models
lstm = keras.models.load_model(ROOT_PATH + "models/ensemble/lstm_model.h5")
mlp = keras.models.load_model(ROOT_PATH + "models/ensemble/mlp_model.h5")
lm = f.load_pickle(ROOT_PATH + "models/ensemble/linear_model.pkl")

In [33]:
n_pred = 100
mean_preds, std_preds, pred_matr = dlf.predict_with_ensemble(lm, lstm, mlp, 
                                                             x_test, y_test, 
                                                             n_pred=n_pred, 
                                                             return_pred_matr=True)

AttributeError: module 'dl_functions' has no attribute 'predict_with_ensemble'

In [None]:
dlf.plot_pred_matrix(pred_matr, x_test, y_test, ts_test, target_tags,
                    start_idx=600, n_obs=200, plotCI=True, z=1.645)