# Results

In [1]:
import os
import sys
import time as t
import pandas as pd
import numpy as np

# Setup for Latex Export: https://matplotlib.org/users/pgf.html. Need to import before pyplot
def figsize(scale):
    fig_width_pt = 469.755                          # Get this from LaTeX using \the\textwidth
    inches_per_pt = 1.0/72.27                       # Convert pt to inch
    golden_mean = (np.sqrt(5.0)-1.0)/2.0            # Aesthetic ratio (you could change this)
    fig_width = fig_width_pt*inches_per_pt*scale    # width in inches
    fig_height = fig_width*golden_mean              # height in inches
    fig_size = [fig_width,fig_height]
    return fig_size

import matplotlib as mpl
mpl.use('pgf')
pgf_with_rc_fonts = {
    "text.usetex": True,
    "font.family": "serif",
    "axes.labelsize": 10,               # LaTeX default is 10pt font.
    "font.size": 10,
    "legend.fontsize": 8,               # Make the legend/label fonts a little smaller
    "xtick.labelsize": 8,
    "ytick.labelsize": 8,
    "figure.figsize": figsize(0.9),     # default fig size of 0.9 textwidth
    #"font.serif": [],                   # use latex default serif font
    #"font.sans-serif": ["DejaVu Sans"], # use a specific sans-serif font
}
mpl.rcParams.update(pgf_with_rc_fonts)

import matplotlib.pyplot as plt

from tabulate import tabulate

from keras.models import load_model
from keras import backend as K

from sklearn.metrics import mean_squared_error, mean_absolute_error

# Import custom module functions
module_path = os.path.abspath(os.path.join(''))
if module_path not in sys.path:
    sys.path.append(module_path)

from helper_functions import data
from helper_functions import lstm


from IPython.display import HTML
from IPython.display import display
%matplotlib 
#notebook
#mpl.rcParams['figure.figsize'] = (9,5)

# The benchmark, which will be used to compare the model's forecast on the testdata
#df_X = data.load_data(typ='standardized', features=['all'])
df_y = data.load_data(typ='standardized', features=['actual'])
df_bench = data.load_data(typ='standardized', features=['entsoe'])
#split_X = data.split_series(series=df_X, mode='percentage', splits=[0.8])
split_y = data.split_series(series=df_y, mode='percentage', splits=[0.8])
split_bench = data.split_series(series=df_bench, mode='percentage', splits=[0.8])
time_test = split_y[1].index
#X_test = split_X[1].values
y_test = split_y[1].values
benchmark_test = split_bench[1].values

# Best models based on test results. For comparison
res_path = os.path.abspath('results/')
model_dir = os.path.abspath('models/')

date = '20170508'
valid_results = {}
valid_results['m2'] = pd.read_csv(os.path.join(res_path, 'notebook_02/', str('model2_results_' + date + '.csv')), delimiter=';')
valid_results['m3'] = pd.read_csv(os.path.join(res_path, 'notebook_03/', str('model3_results_' + date + '.csv')), delimiter=';')
#valid_results['m4'] = pd.read_csv(os.path.join(res_path, 'notebook_04/', str('model4_results_' + t.strftime("%Y%m%d") + '.csv')), delimiter=';')
valid_results['m5'] = pd.read_csv(os.path.join(res_path, 'notebook_05/', str('model5_results_' + date + '.csv')), delimiter=';')
valid_results['m6'] = pd.read_csv(os.path.join(res_path, 'notebook_06/', str('model6_results_' + date + '.csv')), delimiter=';')

test_results_02 = pd.read_csv(os.path.join(res_path, 'notebook_02/', str('model2_test_results' + date + '.csv')), delimiter=';')
test_results_03 = pd.read_csv(os.path.join(res_path, 'notebook_03/', str('model3_test_results' + date + '.csv')), delimiter=';')
#test_results_04 = pd.read_csv(os.path.join(res_path, 'notebook_04/', str('model4_test_results' + t.strftime("%Y%m%d") + '.csv')), delimiter=';')
test_results_05 = pd.read_csv(os.path.join(res_path, 'notebook_05/', str('model5_test_results' + date + '.csv')), delimiter=';')
test_results_06 = pd.read_csv(os.path.join(res_path, 'notebook_06/', str('model6_test_results' + date + '.csv')), delimiter=';')


# Load standardization params
columns = ['actual', 'entsoe_fc','bsl_t','brn_t','zrh_t','lug_t','lau_t','gen_t','stg_t','luz_t']
params_mu = pd.read_csv(os.path.join('Data', 'standardization_params_mu.csv'), header=None)
params_mu.columns = columns
params_sigma = pd.read_csv(os.path.join('Data', 'standardization_params_sigma.csv'), header=None)
params_sigma.columns = columns

mu = params_mu.loc[0]['actual']
sigma = params_sigma.loc[0]['actual']

Using TensorFlow backend.


Using matplotlib backend: TkAgg
| Loaded dataset  | standardized                                                  |
| File path       | /Users/david/AnacondaProjects/STLF/Data/fulldataset_stand.csv |
| Loaded features | ['actual']                                                    |
| Dataset Shape   | (20231, 1)                                                    |
| Loaded dataset  | standardized                                                  |
| File path       | /Users/david/AnacondaProjects/STLF/Data/fulldataset_stand.csv |
| Loaded features | ['entsoe_fc']                                                 |
| Dataset Shape   | (20231, 1)                                                    |
| Original dataset shape  | (20231, 1) |
| 1. new dataset of shape | (16184, 1) |
| 2. new dataset of shape | (4047, 1)  |
| Original dataset shape  | (20231, 1) |
| 1. new dataset of shape | (16184, 1) |
| 2. new dataset of shape | (4047, 1)  |


In [2]:
columns=['Model name', 'Mean absolute error', 'Mean squared error', 'Diff. MAE', 'Diff. MSE']
best_models = {}

In [3]:
valid_features = ['all', 'actual', 'entsoe', 'weather_t', 'weather_i', 'holiday', 'weekday', 'hour', 'month']
   

# Model 2: ENTSOE Only

In [4]:
test_results_02 = test_results_02.sort_values('Mean absolute error', ascending=True)
best_models['m2'] = [test_results_02.loc[0]['Model name'], ['entsoe']]
test_results_02

Unnamed: 0,Model name,Mean absolute error,Mean squared error,Diff. MAE,Diff. MSE
0,model2_274_l-50_l-50_d-0.1,0.328507,0.169947,-0.220801,-0.251682
1,model2_348_l-100_l-50_d-0.2,0.350244,0.190102,-0.199064,-0.231528
2,model2_416_l-150_l-50,0.360535,0.200661,-0.188773,-0.220968
3,model2_346_l-100_l-50_d-0.1,0.415458,0.249737,-0.13385,-0.171892
4,model2_420_l-150_l-50_d-0.2,0.588017,0.478164,0.038709,0.056534


# Model 3: Calendar only

In [5]:
test_results_03 = test_results_03.sort_values('Mean absolute error', ascending=True)
best_models['m3'] = [test_results_03.loc[0]['Model name'], ['holiday', 'weekday', 'hour', 'month']]
test_results_03

Unnamed: 0,Model name,Mean absolute error,Mean squared error,Diff. MAE,Diff. MSE
0,model3_1_l-10,0.327025,0.167908,-0.222283,-0.253721
1,model3_9_l-10_l-20_d-0.2,0.332024,0.174649,-0.217284,-0.246981
2,model3_6_l-10_l-10_d-0.2,0.348046,0.189985,-0.201262,-0.231644
3,model3_92_l-30_l-20_d-0.1,0.372961,0.211488,-0.176346,-0.210142
4,model3_133_l-50_l-20_l-20,0.466532,0.311682,-0.082776,-0.109948


# Model 4: Weather only

In [6]:
#test_results_04 = test_results_04.sort_values('Mean absolute error', ascending=True)
#best_models['m4'] = [test_results_04.loc[0]['Model name'], ['weather_t', 'weather_i']]
#test_results_04

# Model 5: Weather + Calendar



In [7]:
test_results_05 = test_results_05.sort_values('Mean absolute error', ascending=True)
best_models['m5'] = [test_results_05.loc[0]['Model name'], ['weather_t', 'weather_i', 'holiday', 'weekday', 'hour', 'month']]
test_results_05

Unnamed: 0,Model name,Mean absolute error,Mean squared error,Diff. MAE,Diff. MSE
0,model5_116_l-24_l-20_l-20_d-0.1,0.334264,0.178375,-0.215044,-0.243255
1,model5_128_l-24_l-50_l-20_d-0.1,0.342136,0.180466,-0.207171,-0.241164
2,model5_178_l-50_l-50_l-50,0.411285,0.245104,-0.138023,-0.176525
3,model5_227_l-150_l-50_l-50_d-0.1,0.413456,0.257196,-0.135852,-0.164433
4,model5_208_l-150_l-20_l-10,0.457963,0.303341,-0.091345,-0.118289


# Model 6: All available data

In [8]:
test_results_06 = test_results_06.sort_values('Mean absolute error', ascending=True)
best_models['m6'] = [test_results_06.loc[0]['Model name'], ['all']]
test_results_06

Unnamed: 0,Model name,Mean absolute error,Mean squared error,Diff. MAE,Diff. MSE
0,model6_130_l-100_l-20_l-10,0.319502,0.156626,-0.229806,-0.265003
1,model6_124_l-100_l-10_l-20,0.322266,0.157856,-0.227042,-0.263774
2,model6_132_l-100_l-20_l-10_d-0.2,0.323,0.162509,-0.226308,-0.259121
3,model6_122_l-100_l-10_l-10_d-0.1,0.325133,0.167292,-0.224175,-0.254337
4,model6_103_l-50_l-50_l-10,0.339554,0.176718,-0.209754,-0.244911


In [9]:
results = pd.DataFrame(columns=['Model name', 'Mean absolute error', 'Mean squared error', 'Diff. MAE', 'Diff. MSE'])


for idx, m in enumerate(best_models.keys()):
    # Load original config:
    
    config = valid_results[m].loc[valid_results[m]['model_name'] == best_models[m][0]]
    batch_size = int(config['batch_train'].values[0])
    
    notebook = 'notebook_0' + str(m[1:2])
    mod_name = config['model_name'].values[0]
    filename = os.path.join(model_dir, notebook, (mod_name +'.h5'))
     
    size = int(y_test.shape[0] / batch_size)
    
    # Load modules:
    df_X = data.load_data(typ='standardized', features=best_models[m][1])
    split_X = data.split_series(series=df_X, mode='percentage', splits=[0.8])
    X_test = split_X[1].values
    
    
    # Load LSTM Model
    model = load_model(filename)
    model.reset_states()
    predictions = lstm.get_predictions(model=model, X=X_test[0:size*batch_size], batch_size=batch_size, timesteps=1, verbose=0)
        
    # Otherwise, we get a memory leak!
    K.clear_session()
    import tensorflow as tf
    tf.reset_default_graph()
    
    y_test_raw = np.round(y_test * sigma + mu)
    predictions_raw = np.round(predictions * sigma + mu)
    benchmark_test_raw = np.round(benchmark_test * sigma + mu)

    mse_entsoe = mean_squared_error(y_test_raw[0:size*batch_size], benchmark_test_raw[0:size*batch_size])
    mae_entsoe = mean_absolute_error(y_test_raw[0:size*batch_size], benchmark_test_raw[0:size*batch_size])

    mse = mean_squared_error(y_test_raw[0:size*batch_size], predictions_raw)
    mae = mean_absolute_error(y_test_raw[0:size*batch_size], predictions_raw)
    
    #print(mse_entsoe, mae_entsoe, mse, mae)
    result = [{'Model name': mod_name[0:6], 
               'Mean squared error': mse, 'Mean absolute error': mae,
               'Diff. MAE': mae - mae_entsoe, 'Diff. MSE': mse - mse_entsoe,
               'ENTSOE MAE': mae_entsoe, 'ENTSOE MSE': mse_entsoe
              }]
    results = results.append(result, ignore_index=True)

    time_vector = time_test.values
    time_vector = time_vector[0:size*batch_size]
    time_vector = np.reshape(time_vector, (size*batch_size,1))

    #%matplotlib qt
    #plt.clf()
    #plt.ion()
    
    plt.figure()
    plt.plot(time_vector, benchmark_test_raw[:size*batch_size], label='ENTSOE Forecast')
    plt.plot(time_vector, y_test_raw[:size*batch_size], label='Actual Load')
    plt.plot(time_vector, predictions_raw, label='Model predictions')
    plt.title('LSTM Model: {}'.format(mod_name))
    plt.ylabel('Electricity load (in MW)')
    plt.xlabel('Date')
    plt.legend(loc='upper left')
    plt.show

    #filename = plot_dir + model_name + 'top_model_predictions'
    #plt.savefig(filename + '.pgf')
    #plt.savefig(filename + '.pdf')
    

| Loaded dataset  | standardized                                                  |
| File path       | /Users/david/AnacondaProjects/STLF/Data/fulldataset_stand.csv |
| Loaded features | ['entsoe_fc']                                                 |
| Dataset Shape   | (20231, 1)                                                    |
| Original dataset shape  | (20231, 1) |
| 1. new dataset of shape | (16184, 1) |
| 2. new dataset of shape | (4047, 1)  |
| Loaded dataset  | standardized                                                  |
| File path       | /Users/david/AnacondaProjects/STLF/Data/fulldataset_stand.csv |
| Loaded features | ['holiday', 'weekday', 'hour', 'month']                       |
| Dataset Shape   | (20231, 44)                                                   |
| Original dataset shape  | (20231, 44) |
| 1. new dataset of shape | (16184, 44) |
| 2. new dataset of shape | (4047, 44)  |
| Loaded dataset  | standardized                                               

In [10]:
results = results.sort_values('Diff. MAE', ascending=True)
print('Test dataset performance of the best models):')
#print('ENTSOE Forecast (Benchmark) metrics: \tMAE = {:.3f}  \tMSE = {:.3f}'.format(np.asscalar(mae_entsoe), np.asscalar(mse_entsoe)))
print(tabulate(results, headers='keys', numalign="right", floatfmt=".3f"))

Test dataset performance of the best models):
         Diff. MAE    Diff. MSE    ENTSOE MAE    ENTSOE MSE    Mean absolute error    Mean squared error  Model name
-----  -----------  -----------  ------------  ------------  ---------------------  --------------------  ------------
3.000     -270.405  -363825.236       643.805    577753.116                373.400            213927.880  model6
1.000     -261.627  -348436.499       643.805    577753.116                382.177            229316.617  model3
0.000     -259.917  -345673.155       643.805    577753.116                383.888            232079.961  model2
2.000     -253.170  -334141.730       643.805    577753.116                390.635            243611.386  model5
