In [None]:
import os
import re
import sys
import glob
import pickle
import tables
import datetime

import numpy as np
import pandas as pd
from sklearn.preprocessing import OneHotEncoder
import matplotlib.pyplot as plt
%matplotlib inline

import tensorflow as tf
from tensorflow import keras
from comet_ml.api import API, APIExperiment
from comet_ml.query import Tag

if not '..' in sys.path:
    sys.path.append('..')
from train_LSTM import *

In [None]:
api = API(api_key = os.environ['COMET_API_KEY'])
workspace = 'danielelinaro'
project_name = 'load-forecasting'

future = 1    # [hours]
history = 24  # [hours]
hours_ahead = np.r_[0 : 24]

n_neurons = 20
n_layers = 3
with_building_temperature = False
random_initial_weights = True
training_data_files = ['data1']

base_query = Tag('LSTM') & Tag('_'.join(training_data_files)) & \
    Tag(f'future={future}') & Tag(f'history={history}')
if n_layers is not None:
    base_query &= Tag(f'{n_layers}_layers')
if n_neurons is not None:
    base_query &= Tag(f'{n_neurons}_neurons')
if with_building_temperature:
    base_query &= Tag('building_temperature')
if random_initial_weights:
    base_query &= Tag('random_initial_weights')
else:
    base_query &= Tag('initialized_weights')

experiments_path = '../experiments/LSTM/'
experiment_IDs = []
val_losses = []
for hours in hours_ahead:
    sys.stdout.write(f'{hours:2.0f} hours ahead ')
    sys.stdout.flush()
    query = base_query & Tag(f'ahead={hours:.1f}')
    experiments = api.query(workspace, project_name, query, archived=False)
    if not with_building_temperature:
        experiments = [expt for expt in experiments if all([tag != 'building_temperature' \
                                                            for tag in expt.get_tags()])]
    msg = f'({len(experiments)} expts): '
    n_char = len(msg)
    sys.stdout.write(msg)
    min_val_loss = 100
    for experiment in experiments:
        metrics = experiment.get_metrics_summary()
        loss = [float(m['valueMin']) for m in metrics if m['name'] == 'val_loss'][0]
        if loss < min_val_loss:
            ID = experiment.id
            history = pickle.load(open(experiments_path + ID + '/history.pkl','rb'))
            val_loss = history['val_loss']
            min_val_loss = loss
            tags = experiment.get_tags()
    experiment_IDs.append(ID)
    val_losses.append(val_loss)
    n_neurons = [int(tag.split('_')[0]) for tag in tags if 'neurons' in tag][0]
    n_layers = [int(tag.split('_')[0]) for tag in tags if 'layers' in tag][0]
    n_epochs = len(val_loss)
    print(f'best expt is {ID[:9]} ({n_layers} layers, {n_neurons} neurons, {n_epochs:3d} epochs), ' + 
          f'validation loss: {min_val_loss:.4f}')

In [None]:
n_experiments = len(experiment_IDs)
training_set_min = []
training_set_max = []
models = []
for i,experiment_ID in enumerate(experiment_IDs):
    checkpoint_path = experiments_path + experiment_ID + '/checkpoints/'
    checkpoint_files = glob.glob(checkpoint_path + '*.h5')
    epochs = [int(os.path.split(file)[-1].split('.')[1].split('-')[0])
              for file in checkpoint_files]
    best_checkpoint = checkpoint_files[epochs.index(np.argmin(val_losses[i]) + 1)]
    models.append(keras.models.load_model(best_checkpoint, compile=True))

parameters = pickle.load(open(experiments_path + experiment_ID + '/parameters.pkl', 'rb'))
# we need min and max of the training set to normalize the data
training_set_min = parameters['training_set_min']
training_set_max = parameters['training_set_max']
data_file = '../' + parameters['data_file']
time_step = parameters['time_step']

In [None]:
data_full = pickle.load(open(data_file, 'rb'))['full']
data = data_full['building_energy'].copy()
if 'building_consumption' in parameters['inputs']['continuous']:
    data.rename({key: 'building_' + key for key in ('consumption','generation')}, axis='columns', inplace=True)
data['building_temperature'] = data_full['building_sensor']['temperature'].copy()
orig_time_step = extract_time_step(data)
data = add_minute_and_workday(data)
data = average_data(data, time_step, orig_time_step, parameters['inputs']['continuous'])
n_days, samples_per_day = compute_stats(data, time_step)
t = np.arange(samples_per_day) * time_step / 60

print(f'Time step: {time_step} minutes.')
print(f'Number of days: {n_days}.')
print(f'Samples per day: {samples_per_day}.')

In [None]:
n_days_training = int(parameters['data_split']['training'] * n_days)
n_days_test = int(parameters['data_split']['test'] * n_days)
n_days_validation = n_days - n_days_training - n_days_test
train_split = n_days_training * samples_per_day
validation_split = (n_days_training + n_days_validation) * samples_per_day

In [None]:
cols = parameters['inputs']['continuous']
if parameters['average_continuous_inputs']:
    cols = [col + '_averaged' for col in cols]
X = make_dataset(data, cols, parameters['inputs']['categorical'],
                training_set_max, training_set_min, n_days, samples_per_day)
print(f'The input matrix has {X.shape[0]} rows and {X.shape[1]} columns.')

In [None]:
fun = lambda y,M,m: (m + (M - m) / 2 * (y + 1)) * 1e-3

mae = np.zeros(n_days_test)
mape = np.zeros(n_days_test)
for i in range(-1, n_days_test - 1):
    start = validation_split + i * samples_per_day
    stop = start + samples_per_day
    today_measured_scaled = tf.constant(X[start : stop, :][np.newaxis, :, :], dtype=tf.float32)
    tomorrow_measured_scaled = X[start + samples_per_day : stop + samples_per_day, 0]
    tomorrow_predicted_scaled = np.ndarray.flatten(np.concatenate([model.predict(today_measured_scaled)
                                                                   for model in models]))
    tomorrow_measured = fun(tomorrow_measured_scaled, training_set_max[0], training_set_min[0])
    tomorrow_predicted = fun(tomorrow_predicted_scaled, training_set_max[0], training_set_min[0])
    mae[i+1] = tf.keras.losses.MAE(tomorrow_measured, tomorrow_predicted).numpy()
    mape[i+1] = tf.keras.losses.MAPE(tomorrow_measured, tomorrow_predicted).numpy()
    print(f'Day {i+2:2d}: MAE = {mae[i+1]:6.4f} MAPE = {mape[i+1]:4.1f}%')

In [None]:
if with_building_temperature:
    output_file = 'errors_with_building_temperature.pkl'
else:
    output_file = 'errors_without_building_temperature.pkl'
pickle.dump({'mae': mae, 'mape': mape}, open(output_file, 'wb'))

In [None]:
fig,ax = plt.subplots(1, 2, sharey=True, sharex=True, figsize=(4,3))
ax[0].boxplot(mae, notch=True)
ax[1].violinplot(mae)
for a in ax:
    for side in 'top','right':
        a.spines[side].set_visible(False)
ax[0].set_ylabel('MAE')
fig.tight_layout()
if with_building_temperature:
    fig.savefig('errors_with_building_temperature.pdf')
else:
    fig.savefig('errors_without_building_temperature.pdf')

In [None]:
offsets = range(n_days_training + n_days_validation, 364, 3)
n_offsets = len(offsets)
fig,ax = plt.subplots(n_offsets, 1, figsize=(6, n_offsets * 2), sharex=True)
max_today = 100
for i,offset in enumerate(offsets):
    start = offset * samples_per_day
    stop = (offset + 1) * samples_per_day
    today = tf.constant(X[start : stop, :][np.newaxis, :, :], dtype=tf.float32)
    tomorrow = np.ndarray.flatten(np.concatenate([model.predict(today) for model in models]))
    
    today_actual = fun(today[0,:,0], training_set_max[0], training_set_min[0])
    tomorrow_predicted = fun(tomorrow, training_set_max[0], training_set_min[0])
    tomorrow_actual = fun(X[start + samples_per_day : stop + samples_per_day, 0],
                         training_set_max[0], training_set_min[0])
    
    if np.max(today_actual) < max_today:
        max_today = np.max(today_actual)
        j = i
    ax[i].plot(t - 24, today_actual, color=[0,.5,1], lw=1, label='Real today')
    ax[i].plot(t, tomorrow_actual, color=[.3,.3,.3], lw=1, label='Real tomorrow')
    ax[i].plot(t, tomorrow_predicted, color=[1,.5,0], lw=2, label='Predicted tomorrow')
ax[j].legend(loc='upper left')
for a in ax:
    a.set_ylabel('Consumption [kW]')
    a.set_ylim([1.5, 8.5])
    a.set_yticks(np.r_[2 : 9])
    a.grid(True, which='major', axis='y', color=[.8,.8,.8], linestyle='-', linewidth=0.5)
    for side in 'top','right':
        a.spines[side].set_visible(False)
ax[-1].set_xlabel('Time [hours]')
ax[-1].set_xticks(np.r_[-24 : 25 : 6])
fig.tight_layout()
if with_building_temperature:
    fig.savefig('24_hour_forecast_with_building_temperature.pdf')
else:
    fig.savefig('24_hour_forecast_without_building_temperature.pdf')    