# Model Category 3: Using the weather features only
The third model category will only use weather features to create a forecast for the electricity load.

## Model category specific configuration
These parameters are model category specific


In [1]:
# Model category name used throughout the subsequent analysis
model_cat_id = "03"

# Which features from the dataset should be loaded:
# ['all', 'actual', 'entsoe', 'weather_t', 'weather_i', 'holiday', 'weekday', 'hour', 'month']
features = ['actual', 'weather']

# LSTM Layer configuration
# ========================
# Stateful True or false
layer_conf = [ True, True, True ]
# Number of neurons per layer

# cells = [[ 5, 10, 20, 30, 50, 75, 100, 125, 150 ], [0, 10, 20, 50], [0, 10, 15, 20]]
cells = [[ 1, 5, 10, 15 ], [0, 5, 10], [0, 5, 10]]
# Regularization per layer
dropout = [0, 0.1, 0.2]
# Size of how many samples are used for one forward/backward pass
batch_size = [8]
# In a sense this is the output neuron dimension, or how many timesteps the neuron should output. Currently not implemented, defaults to 1.
timesteps = [1]

### Module imports

In [3]:
import os
import sys
import math
import itertools
import datetime as dt
import pytz
import time as t
import numpy as np
import pandas as pd
from pandas import read_csv
from pandas import datetime
from numpy import newaxis

import matplotlib as mpl

import matplotlib.pyplot as plt
import scipy.stats as stats
from statsmodels.tsa import stattools
from tabulate import tabulate

import math
import keras as keras
from keras import backend as K
from keras.models import Sequential
from keras.layers import Activation, Dense, Dropout, LSTM
from keras.callbacks import TensorBoard
from keras.utils import np_utils
from keras.models import load_model

from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error

from IPython.display import HTML
from IPython.display import display
%matplotlib notebook
mpl.rcParams['figure.figsize'] = (9,5)

# Import custom module functions
module_path = os.path.abspath(os.path.join('../'))
if module_path not in sys.path:
    sys.path.append(module_path)

from lstm_load_forecasting import data, lstm

## Overall configuration
These parameters are later used, but shouldn't have to change between different model categories (model 1-5)

In [4]:
# Directory with dataset
path = os.path.join(os.path.abspath(''), 'E:/seminar project/input file for lstm neural netwrok/myData.csv')
# path = os.path.join(os.path.abspath(''), 'E:/seminar project/input file for lstm neural netwrok/fulldataset.csv')
# Splitdate for train and test data. As the TBATS and ARIMA benchmark needs 2 full cycle of all seasonality, needs to be after jan 01. 
# loc_tz = pytz.timezone('Europe/Zurich')
loc_tz = pytz.timezone('America/New_York')
split_date = loc_tz.localize(dt.datetime(2004,10,1,0,0,0,0))
# split_date = loc_tz.localize(dt.datetime(2017,2,1,0,0,0,0))
print(loc_tz)
# Validation split percentage

validation_split = 0.2
# How many epochs in total
epochs = 50
# Set verbosity level. 0 for only per model, 1 for progress bar...
verbose = 0

# Dataframe containing the relevant data from training of all models
results = pd.DataFrame(columns=['model_name', 'config', 'dropout',
                                'train_loss', 'train_rmse', 'train_mae', 'train_mape', 
                                'valid_loss', 'valid_rmse', 'valid_mae', 'valid_mape', 
                                'test_rmse', 'test_mae', 'test_mape',
                                'epochs', 'batch_train', 'input_shape',
                                'total_time', 'time_step', 'splits'
                               ])
# Early stopping parameters
early_stopping = True
min_delta = 0.006
patience = 2

America/New_York


## Preparation and model generation
Necessary preliminary steps and then the generation of all possible models based on the settings at the top of this notebook.

In [5]:
# Generate output folders and files
res_dir = 'E:/seminar project/lstm-load-forecasting/lstm-load-forecasting-master/results/notebook_' + model_cat_id + '/'
plot_dir = 'E:/seminar project/lstm-load-forecasting/lstm-load-forecasting-master/plots/notebook_' + model_cat_id + '/'
model_dir = 'E:/seminar project/lstm-load-forecasting/lstm-load-forecasting-master/models/notebook_' + model_cat_id + '/'
os.makedirs(res_dir, exist_ok=True)
os.makedirs(model_dir, exist_ok=True)
output_table = res_dir + model_cat_id + '_results_' + t.strftime("%Y%m%d") + '.csv'
test_output_table = res_dir + model_cat_id + '_test_results' + t.strftime("%Y%m%d") + '.csv'

# Generate model combinations
models = []
models = lstm.generate_combinations(
    model_name=model_cat_id + '_', layer_conf=layer_conf, cells=cells, dropout=dropout, 
    batch_size=batch_size, timesteps=[1])

| Number of model configs generated | 108 |


## Loading the data:


In [12]:
# Load data and prepare for standardization
#1st method
# df = data.load_dataset(path=path, modules=features)
# datatrain=df.loc[(df.index < split_date )].copy()
# datatest=df.loc[(df.index >= split_date)].copy()
# datatrain=datatrain.copy()
# datatrain=datatrain.dropna()
# datatest=datatest.copy()
# datatest=datatest.dropna()
# Xtrain = datatrain.drop('demand_load', 1).copy()
# ytrain = datatrain.drop('temperature', 1).copy()
# Xtest=datatest.drop('demand_load', 1).copy()
# ytest=datatest.drop('temperature', 1).copy()
# floatstrainx = [key for key in dict(Xtrain.dtypes) if dict(Xtrain.dtypes)[key] in ['float64']]
# floatstrainy = [key for key in dict(ytrain.dtypes) if dict(ytrain.dtypes)[key] in ['float64']]
# floatstestx = [key for key in dict(Xtest.dtypes) if dict(Xtest.dtypes)[key] in ['float64']]
# floatstesty = [key for key in dict(ytest.dtypes) if dict(ytest.dtypes)[key] in ['float64']]
# scalerx=StandardScaler()
# X_train=scalerx.fit_transform(Xtrain[floatstrainx])
# X_test=scalerx.fit_transform(Xtest[floatstestx])
# scalery=StandardScaler()
# y_train=scalery.fit_transform(ytrain[floatstrainy])
# y_test=scalery.fit_transform(ytest[floatstesty])
# print(y_train)

#second method
df = data.load_dataset(path=path, modules=features)
data_scaled=df.copy()
data_scaled=df.dropna()
X_data=data_scaled.drop('demand_load',1).copy()
y_data=data_scaled.drop('temperature',1).copy()
floatsX=[key for key in dict(X_data.dtypes) if dict(X_data.dtypes)[key] in ['float64']]
floatsy=[key for key in dict(y_data.dtypes) if dict(y_data.dtypes)[key] in ['float64']]
scalerx=StandardScaler()
Xscaled=scalerx.fit_transform(X_data[floatsX])
X_data[floatsX]=Xscaled
scalery=StandardScaler()
yscaled=scalery.fit_transform(y_data[floatsy])
y_data[floatsy]=yscaled
y_train=y_data.loc[(y_data.index < split_date )].copy()
X_train=X_data.loc[(X_data.index < split_date )].copy()
y_test=y_data.loc[(y_data.index >= split_date )].copy()
X_test=X_data.loc[(X_data.index >= split_date )].copy()




#third method
# df = data.load_dataset(path=path, modules=features)
# df_scaled = df.copy()
# df_scaled = df_scaled.dropna()
# # Get all float type columns and standardize them
# floats = [key for key in dict(df_scaled.dtypes) if dict(df_scaled.dtypes)[key] in ['float64']]
# scaler = StandardScaler()
# scaled_columns = scaler.fit_transform(df_scaled[floats])
# df_scaled[floats] = scaled_columns
# 
# # Split in train and test dataset
# df_train = df_scaled.loc[(df_scaled.index < split_date )].copy()
# df_test = df_scaled.loc[df_scaled.index >= split_date].copy()
# # Split in features and label data
# # y_train = df_train['actual'].copy()
# # X_train = df_train.drop('actual', 1).copy()
# # y_test = df_test['actual'].copy()
# # X_test = df_test.drop('actual', 1).copy()
# #y_train = df_train['demand_load'].copy()
# X_train = df_train.drop('demand_load', 1).copy()
# y_train = df_train.drop('temperature', 1).copy()
# # y_test = df_test['demand_load'].copy()
# X_test = df_test.drop('demand_load', 1).copy()
# y_test = df_test.drop('temperature', 1).copy()


## Running through all generated models
Note: Depending on the above settings, this can take very long!

In [13]:
start_time = t.time()
for idx, m in enumerate(models):
    stopper = t.time()
    print('========================= Model {}/{} ========================='.format(idx+1, len(models)))
    print(tabulate([['Starting with model', m['name']], ['Starting time', datetime.fromtimestamp(stopper)]],
                   tablefmt="jira", numalign="right", floatfmt=".3f"))
    try:
        # Creating the Keras Model
        model = lstm.create_model(layers=m['layers'], sample_size=X_train.shape[0], batch_size=m['batch_size'], 
                          timesteps=m['timesteps'], features=X_train.shape[1])
        # Training...
        history = lstm.train_model(model=model, mode='fit', y=y_train, X=X_train, 
                                   batch_size=m['batch_size'], timesteps=m['timesteps'], epochs=epochs, 
                                   rearrange=False, validation_split=validation_split, verbose=verbose, 
                                   early_stopping=early_stopping, min_delta=min_delta, patience=patience)

        # Write results
        min_loss = np.min(history.history['val_loss'])
        min_idx = np.argmin(history.history['val_loss'])
        min_epoch = min_idx + 1
        
        if verbose > 0:
            print('______________________________________________________________________')
            print(tabulate([['Minimum validation loss at epoch', min_epoch, 'Time: {}'.format(t.time()-stopper)],
                        ['Training loss & MAE', history.history['loss'][min_idx], history.history['mean_absolute_error'][min_idx]  ], 
                        ['Validation loss & mae', history.history['val_loss'][min_idx], history.history['val_mean_absolute_error'][min_idx] ],
                       ], tablefmt="jira", numalign="right", floatfmt=".3f"))
            print('______________________________________________________________________')
        
        
        result = [{'model_name': m['name'], 'config': m, 'train_loss': history.history['loss'][min_idx], 'train_rmse': 0,
                   'train_mae': history.history['mean_absolute_error'][min_idx], 'train_mape': 0,
                   'valid_loss': history.history['val_loss'][min_idx], 'valid_rmse': 0, 
                   'valid_mae': history.history['val_mean_absolute_error'][min_idx],'valid_mape': 0, 
                   'test_rmse': 0, 'test_mae': 0, 'test_mape': 0, 'epochs': '{}/{}'.format(min_epoch, epochs), 'batch_train':m['batch_size'],
                   'input_shape':(X_train.shape[0], timesteps, X_train.shape[1]), 'total_time':t.time()-stopper, 
                   'time_step':0, 'splits':str(split_date), 'dropout': m['layers'][0]['dropout']
                  }]
        results = results.append(result, ignore_index=True)
        
        # Saving the model and weights
        model.save(model_dir + m['name'] + '.h5')
        
        # Write results to csv
        results.to_csv(output_table, sep=';')
        
        K.clear_session()
        import tensorflow as tf
        tf.reset_default_graph()
        
    # Shouldn't catch all errors, but for now...
    except BaseException as e:
        print('=============== ERROR {}/{} ============='.format(idx+1, len(models)))
        print(tabulate([['Model:', m['name']], ['Config:', m]], tablefmt="jira", numalign="right", floatfmt=".3f"))
        print('Error: {}'.format(e))
        result = [{'model_name': m['name'], 'config': m, 'train_loss': str(e)}]
        results = results.append(result, ignore_index=True)
        results.to_csv(output_table,sep=';')
        continue
        

| Starting with model | 03_1_l-1                   |
| Starting time       | 2018-06-26 19:57:21.217601 |


| Starting with model | 03_2_l-1_d-0.1             |
| Starting time       | 2018-06-26 19:57:32.597806 |


| Starting with model | 03_3_l-1_d-0.2             |
| Starting time       | 2018-06-26 19:57:43.032159 |


| Starting with model | 03_4_l-1_l-5               |
| Starting time       | 2018-06-26 19:57:53.491540 |


| Starting with model | 03_5_l-1_l-5_d-0.1         |
| Starting time       | 2018-06-26 19:58:05.342210 |


| Starting with model | 03_6_l-1_l-5_d-0.2         |
| Starting time       | 2018-06-26 19:58:17.641187 |


| Starting with model | 03_7_l-1_l-10              |
| Starting time       | 2018-06-26 19:58:30.004274 |


| Starting with model | 03_8_l-1_l-10_d-0.1        |
| Starting time       | 2018-06-26 19:58:41.603702 |


| Starting with model | 03_9_l-1_l-10_d-0.2        |
| Starting time       | 2018-06-26 19:58:54.101879 |


| Starting with model | 03_10_l-1_l-5              |
| Starting time       | 2018-06-26 19:59:06.567086 |


| Starting with model | 03_11_l-1_l-5_d-0.1        |
| Starting time       | 2018-06-26 19:59:18.252530 |


| Starting with model | 03_12_l-1_l-5_d-0.2        |
| Starting time       | 2018-06-26 19:59:30.847855 |


| Starting with model | 03_13_l-1_l-5_l-5          |
| Starting time       | 2018-06-26 19:59:43.560220 |


| Starting with model | 03_14_l-1_l-5_l-5_d-0.1    |
| Starting time       | 2018-06-26 19:59:59.841818 |


| Starting with model | 03_15_l-1_l-5_l-5_d-0.2    |
| Starting time       | 2018-06-26 20:00:17.286491 |


| Starting with model | 03_16_l-1_l-5_l-10         |
| Starting time       | 2018-06-26 20:00:34.563983 |


| Starting with model | 03_17_l-1_l-5_l-10_d-0.1   |
| Starting time       | 2018-06-26 20:00:54.270652 |


| Starting with model | 03_18_l-1_l-5_l-10_d-0.2   |
| Starting time       | 2018-06-26 20:01:11.789357 |


| Starting with model | 03_19_l-1_l-10             |
| Starting time       | 2018-06-26 20:01:29.628352 |


| Starting with model | 03_20_l-1_l-10_d-0.1       |
| Starting time       | 2018-06-26 20:01:41.836264 |


| Starting with model | 03_21_l-1_l-10_d-0.2       |
| Starting time       | 2018-06-26 20:01:54.467623 |


| Starting with model | 03_22_l-1_l-10_l-5         |
| Starting time       | 2018-06-26 20:02:06.974803 |


| Starting with model | 03_23_l-1_l-10_l-5_d-0.1   |
| Starting time       | 2018-06-26 20:02:23.225375 |


| Starting with model | 03_24_l-1_l-10_l-5_d-0.2   |
| Starting time       | 2018-06-26 20:02:40.632980 |


| Starting with model | 03_25_l-1_l-10_l-10        |
| Starting time       | 2018-06-26 20:02:58.342859 |


| Starting with model | 03_26_l-1_l-10_l-10_d-0.1  |
| Starting time       | 2018-06-26 20:03:14.609445 |


| Starting with model | 03_27_l-1_l-10_l-10_d-0.2  |
| Starting time       | 2018-06-26 20:03:35.216919 |


| Starting with model | 03_28_l-5                  |
| Starting time       | 2018-06-26 20:03:56.327847 |


| Starting with model | 03_29_l-5_d-0.1            |
| Starting time       | 2018-06-26 20:04:03.594375 |


| Starting with model | 03_30_l-5_d-0.2            |
| Starting time       | 2018-06-26 20:04:10.906918 |


| Starting with model | 03_31_l-5_l-5              |
| Starting time       | 2018-06-26 20:04:18.608837 |


| Starting with model | 03_32_l-5_l-5_d-0.1        |
| Starting time       | 2018-06-26 20:04:30.097126 |


| Starting with model | 03_33_l-5_l-5_d-0.2        |
| Starting time       | 2018-06-26 20:04:42.519264 |


| Starting with model | 03_34_l-5_l-10             |
| Starting time       | 2018-06-26 20:04:55.008459 |


| Starting with model | 03_35_l-5_l-10_d-0.1       |
| Starting time       | 2018-06-26 20:05:06.807040 |


| Starting with model | 03_36_l-5_l-10_d-0.2       |
| Starting time       | 2018-06-26 20:05:19.054018 |


| Starting with model | 03_37_l-5_l-5              |
| Starting time       | 2018-06-26 20:05:33.220764 |


| Starting with model | 03_38_l-5_l-5_d-0.1        |
| Starting time       | 2018-06-26 20:05:47.317390 |


| Starting with model | 03_39_l-5_l-5_d-0.2        |
| Starting time       | 2018-06-26 20:05:59.924661 |


| Starting with model | 03_40_l-5_l-5_l-5          |
| Starting time       | 2018-06-26 20:06:12.596022 |


| Starting with model | 03_41_l-5_l-5_l-5_d-0.1    |
| Starting time       | 2018-06-26 20:06:29.911552 |


| Starting with model | 03_42_l-5_l-5_l-5_d-0.2    |
| Starting time       | 2018-06-26 20:06:50.708193 |


| Starting with model | 03_43_l-5_l-5_l-10         |
| Starting time       | 2018-06-26 20:07:08.164858 |


| Starting with model | 03_44_l-5_l-5_l-10_d-0.1   |
| Starting time       | 2018-06-26 20:07:27.390131 |


| Starting with model | 03_45_l-5_l-5_l-10_d-0.2   |
| Starting time       | 2018-06-26 20:07:53.931892 |


| Starting with model | 03_46_l-5_l-10             |
| Starting time       | 2018-06-26 20:08:21.038187 |


| Starting with model | 03_47_l-5_l-10_d-0.1       |
| Starting time       | 2018-06-26 20:08:33.845678 |


| Starting with model | 03_48_l-5_l-10_d-0.2       |
| Starting time       | 2018-06-26 20:08:46.138716 |


| Starting with model | 03_49_l-5_l-10_l-5         |
| Starting time       | 2018-06-26 20:08:58.768025 |


| Starting with model | 03_50_l-5_l-10_l-5_d-0.1   |
| Starting time       | 2018-06-26 20:09:16.619062 |


| Starting with model | 03_51_l-5_l-10_l-5_d-0.2   |
| Starting time       | 2018-06-26 20:09:38.284439 |


| Starting with model | 03_52_l-5_l-10_l-10        |
| Starting time       | 2018-06-26 20:09:55.978305 |


| Starting with model | 03_53_l-5_l-10_l-10_d-0.1  |
| Starting time       | 2018-06-26 20:10:15.703994 |


| Starting with model | 03_54_l-5_l-10_l-10_d-0.2  |
| Starting time       | 2018-06-26 20:10:36.601727 |


| Starting with model | 03_55_l-10                 |
| Starting time       | 2018-06-26 20:10:57.693641 |


| Starting with model | 03_56_l-10_d-0.1           |
| Starting time       | 2018-06-26 20:11:06.141241 |


| Starting with model | 03_57_l-10_d-0.2           |
| Starting time       | 2018-06-26 20:11:13.584886 |


| Starting with model | 03_58_l-10_l-5             |
| Starting time       | 2018-06-26 20:11:21.338835 |


| Starting with model | 03_59_l-10_l-5_d-0.1       |
| Starting time       | 2018-06-26 20:11:33.104418 |


| Starting with model | 03_60_l-10_l-5_d-0.2       |
| Starting time       | 2018-06-26 20:11:47.685463 |


| Starting with model | 03_61_l-10_l-10            |
| Starting time       | 2018-06-26 20:12:03.243409 |


| Starting with model | 03_62_l-10_l-10_d-0.1      |
| Starting time       | 2018-06-26 20:12:15.064007 |


| Starting with model | 03_63_l-10_l-10_d-0.2      |
| Starting time       | 2018-06-26 20:12:29.137623 |


| Starting with model | 03_64_l-10_l-5             |
| Starting time       | 2018-06-26 20:12:46.747427 |


| Starting with model | 03_65_l-10_l-5_d-0.1       |
| Starting time       | 2018-06-26 20:12:58.792223 |


| Starting with model | 03_66_l-10_l-5_d-0.2       |
| Starting time       | 2018-06-26 20:13:11.316496 |


| Starting with model | 03_67_l-10_l-5_l-5         |
| Starting time       | 2018-06-26 20:13:30.077262 |


| Starting with model | 03_68_l-10_l-5_l-5_d-0.1   |
| Starting time       | 2018-06-26 20:13:46.405902 |


| Starting with model | 03_69_l-10_l-5_l-5_d-0.2   |
| Starting time       | 2018-06-26 20:14:07.499813 |


| Starting with model | 03_70_l-10_l-5_l-10        |
| Starting time       | 2018-06-26 20:14:28.137315 |


| Starting with model | 03_71_l-10_l-5_l-10_d-0.1  |
| Starting time       | 2018-06-26 20:14:47.005232 |


| Starting with model | 03_72_l-10_l-5_l-10_d-0.2  |
| Starting time       | 2018-06-26 20:15:08.930893 |


| Starting with model | 03_73_l-10_l-10            |
| Starting time       | 2018-06-26 20:15:30.015843 |


| Starting with model | 03_74_l-10_l-10_d-0.1      |
| Starting time       | 2018-06-26 20:15:46.700756 |


| Starting with model | 03_75_l-10_l-10_d-0.2      |
| Starting time       | 2018-06-26 20:15:59.096903 |


| Starting with model | 03_76_l-10_l-10_l-5        |
| Starting time       | 2018-06-26 20:16:11.951426 |


| Starting with model | 03_77_l-10_l-10_l-5_d-0.1  |
| Starting time       | 2018-06-26 20:16:34.502658 |


| Starting with model | 03_78_l-10_l-10_l-5_d-0.2  |
| Starting time       | 2018-06-26 20:16:54.412463 |


| Starting with model | 03_79_l-10_l-10_l-10       |
| Starting time       | 2018-06-26 20:17:16.260104 |


| Starting with model | 03_80_l-10_l-10_l-10_d-0.1 |
| Starting time       | 2018-06-26 20:17:35.376193 |


| Starting with model | 03_81_l-10_l-10_l-10_d-0.2 |
| Starting time       | 2018-06-26 20:17:57.023600 |


| Starting with model | 03_82_l-15                 |
| Starting time       | 2018-06-26 20:18:17.690128 |


| Starting with model | 03_83_l-15_d-0.1           |
| Starting time       | 2018-06-26 20:18:25.320970 |


| Starting with model | 03_84_l-15_d-0.2           |
| Starting time       | 2018-06-26 20:18:33.212047 |


| Starting with model | 03_85_l-15_l-5             |
| Starting time       | 2018-06-26 20:18:41.098148 |


| Starting with model | 03_86_l-15_l-5_d-0.1       |
| Starting time       | 2018-06-26 20:18:55.254858 |


| Starting with model | 03_87_l-15_l-5_d-0.2       |
| Starting time       | 2018-06-26 20:19:17.424685 |


| Starting with model | 03_88_l-15_l-10            |
| Starting time       | 2018-06-26 20:19:29.991985 |


| Starting with model | 03_89_l-15_l-10_d-0.1      |
| Starting time       | 2018-06-26 20:19:46.126424 |


| Starting with model | 03_90_l-15_l-10_d-0.2      |
| Starting time       | 2018-06-26 20:20:00.153027 |


| Starting with model | 03_91_l-15_l-5             |
| Starting time       | 2018-06-26 20:20:13.026537 |


| Starting with model | 03_92_l-15_l-5_d-0.1       |
| Starting time       | 2018-06-26 20:20:30.635324 |


| Starting with model | 03_93_l-15_l-5_d-0.2       |
| Starting time       | 2018-06-26 20:20:43.606986 |


| Starting with model | 03_94_l-15_l-5_l-5         |
| Starting time       | 2018-06-26 20:20:58.069970 |


| Starting with model | 03_95_l-15_l-5_l-5_d-0.1   |
| Starting time       | 2018-06-26 20:21:20.379957 |


| Starting with model | 03_96_l-15_l-5_l-5_d-0.2   |
| Starting time       | 2018-06-26 20:21:40.909381 |


| Starting with model | 03_97_l-15_l-5_l-10        |
| Starting time       | 2018-06-26 20:22:02.556772 |


| Starting with model | 03_98_l-15_l-5_l-10_d-0.1  |
| Starting time       | 2018-06-26 20:22:22.565680 |


| Starting with model | 03_99_l-15_l-5_l-10_d-0.2  |
| Starting time       | 2018-06-26 20:22:43.695640 |


| Starting with model | 03_100_l-15_l-10           |
| Starting time       | 2018-06-26 20:23:05.941616 |


| Starting with model | 03_101_l-15_l-10_d-0.1     |
| Starting time       | 2018-06-26 20:23:20.406550 |


| Starting with model | 03_102_l-15_l-10_d-0.2     |
| Starting time       | 2018-06-26 20:23:33.231036 |


| Starting with model | 03_103_l-15_l-10_l-5       |
| Starting time       | 2018-06-26 20:23:46.597025 |


| Starting with model | 03_104_l-15_l-10_l-5_d-0.1 |
| Starting time       | 2018-06-26 20:24:11.556398 |


| Starting with model | 03_105_l-15_l-10_l-5_d-0.2 |
| Starting time       | 2018-06-26 20:24:41.632365 |


| Starting with model | 03_106_l-15_l-10_l-10      |
| Starting time       | 2018-06-26 20:25:09.111001 |


| Starting with model | 03_107_l-15_l-10_l-10_d-0.1 |
| Starting time       | 2018-06-26 20:25:35.884006  |


| Starting with model | 03_108_l-15_l-10_l-10_d-0.2 |
| Starting time       | 2018-06-26 20:26:00.153767  |


## Model selection based on the validation MAE

Select the top 5 models based on the Mean Absolute Error in the validation data:
http://scikit-learn.org/stable/modules/model_evaluation.html#mean-absolute-error

In [14]:
# Number of the selected top models 
selection = 5
# If run in the same instance not necessary. If run on the same day, then just use output_table
results_fn = res_dir + model_cat_id + '_results_' + '20180614' + '.csv'



results_csv = pd.read_csv(results_fn, delimiter=';', encoding='latin1')




top_models = results_csv.nsmallest(selection, 'valid_mae')
print(top_models)

     Unnamed: 0         model_name  \
372         372  03_373_l-125_l-50   
96           96         03_97_l-20   
297         297  03_298_l-100_l-20   
345         345  03_346_l-125_l-20   
36           36     03_37_l-5_l-50   

                                                config  dropout  \
372  {'name': '03_373_l-125_l-50', 'layers': [{'typ...      0.0   
96   {'name': '03_97_l-20', 'layers': [{'type': 'ls...      0.0   
297  {'name': '03_298_l-100_l-20', 'layers': [{'typ...      0.0   
345  {'name': '03_346_l-125_l-20', 'layers': [{'typ...      0.0   
36   {'name': '03_37_l-5_l-50', 'layers': [{'type':...      0.0   

             train_loss  train_rmse  train_mae  train_mape  valid_loss  \
372   0.634248168572874         0.0   0.657725         0.0    0.542658   
96   0.6236581323458884         0.0   0.649591         0.0    0.526598   
297  0.6233532765602812         0.0   0.648314         0.0    0.520918   
345  0.6273993025944317         0.0   0.651957         0.0    0.530819  

## Evaluate top 5 models

In [17]:
# Init test results table

# Init test results table
test_results = pd.DataFrame(columns=['Model name', 'Mean absolute error', 'Mean squared error'])

# Init empty predictions
predictions = {}

# Loop through models
for index, row in top_models.iterrows():
    filename = model_dir + row['model_name'] + '.h5'
    # model=h5py.File(filename)
    
    model = load_model(filename)
    
    batch_size = int(row['batch_train'])
    # print(filename)
    # Calculate scores
    loss, mae = lstm.evaluate_model(model=model, X=X_test, y=y_test, batch_size=batch_size, timesteps=1, verbose=verbose)

    # Store results
    result = [{'Model name': row['model_name'], 
               'Mean squared error': loss, 'Mean absolute error': mae
              }]
    test_results = test_results.append(result, ignore_index=True)

    # Generate predictions
    model.reset_states()
    model_predictions = lstm.get_predictions(model=model, X=y_test, batch_size=batch_size, timesteps=timesteps[0], verbose=verbose)
    
        # Save predictions
    predictions[row['model_name']] = model_predictions

    K.clear_session()
    import tensorflow as tf
    tf.reset_default_graph()


test_results = test_results.sort_values('Mean absolute error', ascending=True)
test_results = test_results.set_index(['Model name'])

if not os.path.isfile(test_output_table):
    test_results.to_csv(test_output_table, sep=';')
else: # else it exists so append without writing the header
    test_results.to_csv(test_output_table,mode = 'a',header=False, sep=';')

In [22]:
print('Test dataset performance of the best {} (out of {} tested models):'.format(min(selection, len(models)), len(models)))
print(tabulate(test_results, headers='keys', tablefmt="grid", numalign="right", floatfmt=".3f"))
restored_1=scalery.inverse_transform(model_predictions)
for i in range(len(restored_1)):
    print(restored_1[i])
restored_2=scalery.inverse_transform(y_test)
for i in range(len(restored_2)):
            print(restored_2[i])



Test dataset performance of the best 5 (out of 108 tested models):


+-------------------+-----------------------+----------------------+
| Model name        |   Mean absolute error |   Mean squared error |
| 03_298_l-100_l-20 |                 0.889 |                1.035 |
+-------------------+-----------------------+----------------------+
| 03_373_l-125_l-50 |                 0.912 |                1.146 |
+-------------------+-----------------------+----------------------+
| 03_37_l-5_l-50    |                 0.915 |                1.164 |
+-------------------+-----------------------+----------------------+
| 03_346_l-125_l-20 |                 0.955 |                1.320 |
+-------------------+-----------------------+----------------------+
| 03_97_l-20        |                 0.959 |                1.319 |
+-------------------+-----------------------+----------------------+


In [32]:


history.history['mean_absolute_error']
history.history['val_mean_absolute_error']
history.history['loss']
history.history['val_loss']

[1.1310293587242686, 1.1311681201545203, 1.1311185490794298]

In [None]:
# plots=lstm.plot_history(history=history, metrics='mean_absolute_error', interactive=False, display=False)