# Compare Forecasts

In [2]:
import os
import datetime
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt

from keras.models import load_model
from keras import backend as K

import pytz
from sklearn.metrics import mean_squared_error, mean_absolute_error
from sklearn.preprocessing import StandardScaler
from tabulate import tabulate

from lstm_load_forecasting import data, lstm

from ast import literal_eval

%matplotlib

Using TensorFlow backend.


Using matplotlib backend: TkAgg


# TBATS Benchmark

In [3]:
tbats_fc = pd.read_csv(os.path.join('Data', 'tbats_forecast_01022017-h5000.csv'))

starting = datetime.datetime(2017,2,1,0,0,0,0, tzinfo=pytz.utc )
forecasts = pd.DataFrame(data={"tbats_forecast": tbats_fc['tbats_fc'].values}, index=pd.date_range(starting, periods=5000, freq='60min'))

# Actual Load and ENTSOE Benchmark

In [4]:
#df = pd.read_csv(os.path.join('Data', 'fulldataset.csv'), sep=';', usecols=[0,1,2], parse_dates=[0], index_col = 0)
path = os.path.join('Data', 'fulldataset.csv')
entsoe = data.load_dataset(path=path, modules=['entsoe'])
actual = data.load_dataset(path=path, modules=['actual'])
forecasts = forecasts.join(entsoe)
forecasts = forecasts.join(actual)

# LSTM Models

In [5]:
# Best models based on test results. For comparison
res_path = os.path.abspath('results/')
model_dir = os.path.abspath('models/')
date = '20170508'
starting = datetime.datetime(2017,2,1,0,0,0,0, tzinfo=pytz.utc )

### LSTM Model 6 (All available data)

In [6]:
df6 = data.load_dataset(path=path, modules=['all'])
df6_scaled = df6.copy()
df6_scaled = df6_scaled.dropna()

# Get all float type columns
floats = [key for key in dict(df6_scaled.dtypes) if dict(df6_scaled.dtypes)[key] in ['float64']]
scaler = StandardScaler()
scaled_columns = scaler.fit_transform(df6_scaled[floats])
df6_scaled[floats] = scaled_columns

df6_train = df6_scaled.loc[(df6_scaled.index < starting)].copy()
#df6_train = df6_scaled.loc[(df6_scaled.index < starting) & (df6_scaled.index > starting - pd.DateOffset(months=15))].copy()
df6_test = df6_scaled.loc[df6_scaled.index >= starting].copy()
y_train = df6_train['actual'].copy()
X_train = df6_train.drop('actual', 1).copy()
y_test = df6_test['actual'].copy()
X_test = df6_test.drop('actual', 1).copy()

valid_results_6 = pd.read_csv(os.path.join(res_path, 'notebook_06/', str('model6_results_' + date + '.csv')), delimiter=';')
test_results_6 = pd.read_csv(os.path.join(res_path, 'notebook_06/', str('model6_test_results' + date + '.csv')), delimiter=';')
test_results_6 = test_results_6.sort_values('Mean absolute error', ascending=True)
best_model_6 = test_results_6.loc[0]['Model name']

config = valid_results_6.loc[valid_results_6['model_name'] == best_model_6]
batch_size = int(config['batch_train'].values[0])
size = int(y_test.shape[0] / batch_size)

layers = literal_eval(config['config'].values[0])
layers = layers['layers']


In [7]:
model6 = lstm.create_model(layers=layers, sample_size=X_train.shape[0], batch_size=config['batch_train'].values, timesteps=1, features=X_train.shape[1], loss='mse', optimizer='adam')
history = lstm.train_model(model=model6, mode='fit', y=y_train, X=X_train, 
                           batch_size=batch_size, timesteps=1, epochs=25, 
                           rearrange=False, validation_split=0.2, verbose=1)

Warnining: Division "sample_size/batch_size" not a natural number.
Dropped the last 1 of 18241 number of obs.
Effective validation split now is: 0.200
Train on 14592 samples, validate on 3648 samples
Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 00011: early stopping


In [8]:
scaled_predictions = lstm.get_predictions(model=model6, X=X_test[0:size*batch_size], batch_size=batch_size, timesteps=1, verbose=1)



In [9]:
mu = scaler.mean_[0]
sigma = scaler.scale_[0]

mod6_predictions = mu + sigma*scaled_predictions
df_mod6 = pd.DataFrame(data={"model6": mod6_predictions.flatten()}, index=pd.date_range(starting, periods=mod6_predictions.shape[0], freq='60min'))
if 'model6' in forecasts.columns:
    forecasts = forecasts.drop('model6', 1)
forecasts = forecasts.join(df_mod6)

In [10]:
K.clear_session()
import tensorflow as tf
tf.reset_default_graph()

# LSTM Model 3 (Calendar only)

In [11]:
df3 = data.load_dataset(path=path, modules=['actual', 'calendar'])
df3_scaled = df3.copy()
df3_scaled = df3_scaled.dropna()

# Get all float type columns
floats = [key for key in dict(df3_scaled.dtypes) if dict(df3_scaled.dtypes)[key] in ['float64']]
scaler = StandardScaler()
scaled_columns = scaler.fit_transform(df3_scaled[floats])
df3_scaled[floats] = scaled_columns

df3_train = df3_scaled.loc[(df3_scaled.index < starting)].copy()
#df3_train = df3_scaled.loc[(df3_scaled.index < starting) & (df3_scaled.index > starting - pd.DateOffset(months=15))].copy()
df3_test = df3_scaled.loc[df3_scaled.index >= starting].copy()
y_train = df3_train['actual'].copy()
X_train = df3_train.drop('actual', 1).copy()
y_test = df3_test['actual'].copy()
X_test = df3_test.drop('actual', 1).copy()

valid_results_3 = pd.read_csv(os.path.join(res_path, 'notebook_03/', str('model3_results_' + date + '.csv')), delimiter=';')
test_results_3 = pd.read_csv(os.path.join(res_path, 'notebook_03/', str('model3_test_results' + date + '.csv')), delimiter=';')
test_results_3 = test_results_3.sort_values('Mean absolute error', ascending=True)
best_model_3 = test_results_3.loc[0]['Model name']

config = valid_results_3.loc[valid_results_3['model_name'] == best_model_3]
batch_size = int(config['batch_train'].values[0])
size = int(y_test.shape[0] / batch_size)

layers = literal_eval(config['config'].values[0])
layers = layers['layers']

In [12]:
model3 = lstm.create_model(layers=layers, sample_size=X_train.shape[0], batch_size=config['batch_train'].values, timesteps=1, features=X_train.shape[1], loss='mse', optimizer='adam')
history = lstm.train_model(model=model3, mode='fit', y=y_train, X=X_train, 
                           batch_size=batch_size, timesteps=1, epochs=25, 
                           rearrange=False, validation_split=0.2, verbose=1)

Warnining: Division "sample_size/batch_size" not a natural number.
Dropped the last 1 of 18241 number of obs.
Effective validation split now is: 0.200
Train on 14592 samples, validate on 3648 samples
Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 00011: early stopping


In [13]:
scaled_predictions = lstm.get_predictions(model=model3, X=X_test[0:size*batch_size], batch_size=batch_size, timesteps=1, verbose=1)



In [14]:
mu = scaler.mean_[0]
sigma = scaler.scale_[0]

mod3_predictions = mu + sigma*scaled_predictions
df_mod3 = pd.DataFrame(data={"model3": mod3_predictions.flatten()}, index=pd.date_range(starting, periods=mod3_predictions.shape[0], freq='60min'))
if 'model3' in forecasts.columns:
    forecasts = forecasts.drop('model3', 1)
forecasts = forecasts.join(df_mod3)

# Table with Results

In [16]:
def mean_absolute_percentage_error(y_true, y_pred): 
    return np.mean(np.abs((y_true - y_pred) / y_true)) * 100

forecasts = forecasts.dropna()

results = {}
results[''] = ['MSE', 'MAE', 'MAPE']
results['tbats'] = [mean_squared_error(forecasts['actual'], forecasts['tbats_forecast']), 
                    mean_absolute_error(forecasts['actual'], forecasts['tbats_forecast']),
                    mean_absolute_percentage_error(forecasts['actual'], forecasts['tbats_forecast'])
                   ]
results['entsoe'] = [mean_squared_error(forecasts['actual'], forecasts['entsoe']), 
                     mean_absolute_error(forecasts['actual'], forecasts['entsoe']),
                     mean_absolute_percentage_error(forecasts['actual'], forecasts['entsoe'])
                    ]
results['m6-all'] = [mean_squared_error(forecasts['actual'], forecasts['model6']), 
                     mean_absolute_error(forecasts['actual'], forecasts['model6']),
                     mean_absolute_percentage_error(forecasts['actual'], forecasts['model6'])
                    ]
results['m3-calendar'] = [mean_squared_error(forecasts['actual'], forecasts['model3']), 
                          mean_absolute_error(forecasts['actual'], forecasts['model3']),
                          mean_absolute_percentage_error(forecasts['actual'], forecasts['model3'])
                         ]

print(tabulate(results, headers='keys', numalign="right", tablefmt='latex_booktabs', floatfmt=".1f"))

\begin{tabular}{lrrrr}
\toprule
      &     tbats &   entsoe &   m6-all &   m3-calendar \\
\midrule
 MSE  & 1014545.8 & 439829.7 & 195661.9 &      244965.0 \\
 MAE  &     852.4 &    530.6 &    357.7 &         402.7 \\
 MAPE &      13.2 &      7.9 &      5.3 &           6.1 \\
\bottomrule
\end{tabular}


In [1]:
plt.figure()
plt.plot(forecasts.index, forecasts['entsoe'], label='ENTSOE Forecast')
plt.plot(forecasts.index, forecasts['actual'], label='Actual Load')
plt.plot(forecasts.index, forecasts['tbats_forecast'], label='TBATS Forecast')
plt.plot(forecasts.index, forecasts['model6'], label='Model 6 (All)')
plt.plot(forecasts.index, forecasts['model3'], label='Model 3 (Calendar)')
plt.title('Forecast Comparison: Test Data')
plt.ylabel('Electricity load (in MW)')
plt.xlabel('Date')
plt.legend(loc='upper left')
plt.show

NameError: name 'plt' is not defined