# Ensemble stacking
Combining models for better generalizing abilities. Will combine the predictions of the best LSTM model and the MLP model. The performance of the two show that they both do generally good, but that one model predicts better at one target than the other. It is therefore assumed that one model manages to extract some information that the other cannot in predicting specific targets. Overall generalizing abilities of the models can most likely be boosted by a technique called "ensemble stacking", where the two models predictions are combined into one final prediction. 

In [39]:
import sys, os
ROOT_PATH = os.path.abspath(".").split("src")[0]
module_path = os.path.abspath(os.path.join(ROOT_PATH+"/src/utils/"))
if module_path not in sys.path:
    sys.path.append(module_path)

import pickle
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from matplotlib import rc 
from tabulate import tabulate

from sklearn.linear_model import LinearRegression

import functions as f
import dl_functions as dlf

In [40]:
# configure matplotlib params and plotting
sns.set()
sns.set_context('paper')
sns.set_style('whitegrid', {'axes.grid': True, 'grid.linestyle': '--'})
rc('figure', figsize=(12,6))
rc('xtick', labelsize=12)
rc('ytick', labelsize=12)
rc('axes', labelsize=13, titlesize=14)
rc('legend', fontsize=14, handlelength=2)
rc('font', family='serif')

In [41]:
# read data and metadata
df_train, df_valid, df_test = f.load_data()
stats, ts, ts_train, ts_valid, ts_test = f.load_metadata()

# split datasets into features and targets
x_train, y_train = f.split_dataset(df_train.values, delay=1)
x_valid, y_valid = f.split_dataset(df_valid.values, delay=1)
x_test, y_test = f.split_dataset(df_test.values, delay=1)

# metadata
target_tags = df_train.columns.values[:3]
feature_tags = df_train.columns.values[3:]
target_stds = stats.loc[target_tags,"Std"].values

In [52]:
def evaluate_error(preds, targets, target_stdevs=target_stds):
    """
    Will evaluate the MAE of a set of predictions and targets.
    
    :param preds: Matrix of predictions with shape (n_obs, n_target_variables)
    :param targets: Matrix of true targets with shape (n_obs, n_target_variables)
    :param target_stdevs: 1D vector of the standard deviations of the target variables.
    
    :return return_dict: A dictionary with the computed error variables. 
    """
    
    maes = f.MAE(targets, preds, vector=True)
    maes_unstd = (maes * target_stdevs)

    err_df = pd.DataFrame(np.column_stack([["FT", "TT", "PT"], maes, maes_unstd]), 
                          columns=['Tag', 'MAE (std)', 'MAE (unstd)'])
    err_df.loc["Avg"] = err_df.mean()
    
    str_table = tabulate(err_df, headers='keys', tablefmt='psql', floatfmt='.5f')

    return_dict = {
        'err_df': err_df,
        'err_table': str_table,
        'maes': maes,
        'maes_unstd': maes_unstd
    }
    
    return return_dict

In [53]:
def train_lm(lstm_model, mlp_model, X, y, n_pred=30):
    lstm_preds = dlf.predict_with_model(lstm_model, X, y, n_predictions=n_pred)[0]
    mlp_preds = mlp_model.predict(X)
    
    Xtr = np.concatenate((lstm_preds, mlp_preds), axis=1)
    
    # find weights between the predictions of the LSTM model and the MLP by Linear Regr.
    lm = LinearRegression().fit(Xtr,y)
    return lm

def predict(lm, lstm_model, mlp_model, X, y, n_pred=30):
    
    lstm_preds = dlf.predict_with_model(lstm_model, X, y, n_predictions=n_pred)[0]
    mlp_preds = mlp_model.predict(X)
    
    Xtr = np.concatenate((lstm_preds, mlp_preds), axis=1)
    
    return lm.predict(Xtr)

def evaluate_ensemble(lstm_model, mlp_model, train_tuple, val_tuple, test_tuple, n_pred=30):
    lm = train_lm(lstm_model, mlp_model, train_tuple[0], train_tuple[1], n_pred)
    
    val_preds = predict(lm, lstm_model, mlp_model, val_tuple[0], val_tuple[1], n_pred)
    val_errs = evaluate_error(val_tuple[1], val_preds)
    
    test_preds = predict(lm, lstm_model, mlp_model, test_tuple[0], test_tuple[1], n_pred)
    test_errs = evaluate_error(test_tuple[1], test_preds)
    
    print("\n **** VALIDATION **** ")
    print(val_errs['err_table'])
    
    print("\n **** TEST **** ")
    print(test_errs['err_table'])
    
    return (val_preds, val_errs), (test_preds, test_errs)

In [54]:
%%capture
# load models
lstm_model = dlf.load_keras_model(ROOT_PATH + "models/lstm_128/50/")
mlp_model = dlf.load_keras_model(ROOT_PATH + "models/mlp_512/50/")

In [55]:
n_pred = 30
val_ensemble, test_ensemble = evaluate_ensemble(lstm_model, 
                                                mlp_model, 
                                                (x_train, y_train), 
                                                (x_valid, y_valid), 
                                                (x_test, y_test), 
                                                n_pred=n_pred)


 **** VALIDATION **** 
+-----+-------+-------------+---------------+
|     | Tag   |   MAE (std) |   MAE (unstd) |
|-----+-------+-------------+---------------|
| 0   | FT    |     0.58224 |    1966.05145 |
| 1   | TT    |     0.30202 |       0.29493 |
| 2   | PT    |     0.40426 |       0.10005 |
| Avg | nan   |   nan       |     nan       |
+-----+-------+-------------+---------------+

 **** TEST **** 
+-----+-------+-------------+---------------+
|     | Tag   |   MAE (std) |   MAE (unstd) |
|-----+-------+-------------+---------------|
| 0   | FT    |     0.51240 |    1730.20578 |
| 1   | TT    |     0.24929 |       0.24344 |
| 2   | PT    |     0.38778 |       0.09597 |
| Avg | nan   |   nan       |     nan       |
+-----+-------+-------------+---------------+


### Export results

In [None]:
val_df = pd.DataFrame(np.vstack(val_data), index = indexes, columns=columns)
test_df = pd.DataFrame(np.vstack(test_data), index = indexes, columns=columns)
summary_df = pd.concat([val_df, test_df], axis=1, keys=["Validation", "Test"])

In [None]:
def get_summary(val_dicts, test_dicts, columns, indexes, unstd=False, texpath=None, round_digits=4):
    """
    Method that concatenates validation and test dictionaries (as obtained by evaluate_error()) into a dataframe.
    Is typically used to get a summary of multiple methods and their performance on test and validation data. 
    
    :param val_dicts: Validation dictionaries as obtained by evalueate_error()
    :param test_dicts: Test dictionaries as obtained by evaluate_error()
    :param columns: Vector of column names for the dataframe
    :param indexes: Vector of index names for the dataframe
    :param unstd: Boolean if the MAEs should be unstandardized or not. Default=False. 
    :param texpath: Path to save the obtained latex output. Default=None. 
    :round_digit: The significant digits to round a decimal to, used in format_digit(). Default=4. 
    
    :return: The summary dataframe and the latex string of the dataframe. 
    """
    
    name = 'maes_unstd' if unstd else 'maes'
    val_data = []
    for i, d in enumerate(val_dicts):
        tmp = np.append(d[name], d['avg_mae'])
        tmp = [format_digit(digit) for digit in tmp]
        val_data.append(tmp)

    test_data = []
    for i, d in enumerate(test_dicts):
        tmp = np.append(d[name], d['avg_mae'])
        tmp = [format_digit(digit) for digit in tmp]
        test_data.append(tmp)

    val_df = pd.DataFrame(np.vstack(val_data), index = indexes, columns=columns)
    test_df = pd.DataFrame(np.vstack(test_data), index = indexes, columns=columns)
    summary_df = pd.concat([val_df, test_df], axis=1, keys=["Validation", "Test"])
    
    tex = latexify(summary_df)

    if texpath is not None: # save the file
        with open(texpath, 'w+') as f:
            f.write(tex)
    
    return summary_df, tex

In [51]:
val_ensemble[1]['err_df']

Unnamed: 0,Tag,MAE (Standardized),MAE (Unstandardized)
0,FT,0.5829793440798184,1968.5339519571417
1,TT,0.2794662369312663,0.2729062845089852
2,PT,0.4055721406955908,0.100373887389097
3,Avg,0.422673,656.302
