In [1]:
# Notebook for prediction and evaluation of multi-step forecasting ARTRFDC models

In [2]:
import os
import json
import numpy as np
import pandas as pd
import joblib
from datetime import datetime
from math import sqrt
from sklearn.metrics import mean_squared_error, mean_absolute_error

# uncomment the following line for compatibility with TensorFlow 1.15 (on GCP)
# import tensorflow.compat.v1 as tf
# uncomment the following line for TensorFlow 2.X (local execution)
import tensorflow as tf

# forecast model was saved in TensorFlow 1.15
# but, in order to make predictions locally, has to be loaded with TensorFlow 2
from tensorflow.saved_model import load

In [3]:
from bokeh.plotting import figure, show, output_file, save
from bokeh.io import output_notebook
# select a palette
from bokeh.palettes import d3
output_notebook()

In [4]:
# symmetrical mean absolute percentage error
def smape(targets, predictions):
    '''
    predictions: a list with the predicted values
    targets: a list with the actual values
    '''
    import numpy as np
    # lists to NumPy arrays
    targets, predictions = np.array(targets), np.array(predictions)
    # verify predictions and targets have the same shape
    if predictions.shape == targets.shape:
            return(np.sum(2*np.abs(predictions - targets) /
                          (np.abs(targets) + np.abs(predictions)))/predictions.shape[0])

In [5]:
def _parse_dataset_function(example_proto, objective_shapes, parse_timestamp):
    # parse the input tf.Example proto using the dictionary above
    row = tf.io.parse_single_example(example_proto, read_features)
    
    # pass objective shape as a list of lists
    source_target = tf.reshape(row['source_target'].values, objective_shapes['source_target'])

    # the parsed dataset have now the shape {features}, {labels}
    # so:
    features_dict = {
        'source': source_target[:-1] # from the first value to the one before last
    }
    
    labels_dict = {
        'target': source_target[1:] # from the second value to the last one
    }
    
    # do not parse the timestamp for training!!! Strings are not supported in TPUs!!!,
    # (or parse it as a number, if required)
    if parse_timestamp:
        # AT THIS TIME, PASS ONLY THE FEATURE TIMESTAMP INSIDE THE FEATURES DICT
        # AND CODE THE EQUIVALENT FUNCTIONALITY TO PASS THE TARGET TIMESTAMP INSIDE THE TARGETS DICT
        
        timestamp = tf.reshape(row['timestamp'].values, objective_shapes['timestamp'])
        features_dict['timestamp'] = timestamp[:-1] # from the first value to the one before last
        labels_dict['timestamp'] = timestamp[1:] # from the second value to the last one
        
    # ToDo: pass the target and the target timestamp as keys of a targets_dict, as in the features dict
    # target = source_target[1:] # from the second value to the last one    

    # _parse_dataset_function returns:
    # features as a dictionary
    # labels as a dictionary
    return features_dict, labels_dict

In [6]:
# a function to encode float values for serialized examples
def _float_feature_from_list_of_values(list_of_values):
    """Returns a float_list from a list of floats / doubles."""
    return tf.train.Feature(float_list=tf.train.FloatList(value=list_of_values))

In [7]:
def tensor_to_tensor_example(float_tensor):
    # first, pass the float tensor to NumPy array, then flatten it
    flat_array = float_tensor.numpy().flatten()
    # second, build the protobuffer example
    example = tf.train.Example(
        # features within the example
        features=tf.train.Features(
            # individual feature definition
            feature={'source': _float_feature_from_list_of_values(flat_array)}
        )
    )    
    # third, serialize the example dictionary to a string
    serialized_example = example.SerializeToString()
    # fourth, wrap the serialized example as a NumPy-string array
    numpy_example = np.array(serialized_example, dtype='S')
    # fifth, wrap the NumPy-string array as a string tensor
    tensor_example = tf.convert_to_tensor(numpy_example)

    return tensor_example

In [8]:
read_features = {
    'source_target': tf.io.VarLenFeature(dtype=tf.float32),
    'timestamp': tf.io.VarLenFeature(dtype=tf.string)
}

In [9]:
PROJECT_ROOT = '/home/developer/gcp/cbidmltsf'

# during batch prediction, the SLDB identifier is obtained via Abseil Flags
sldb_id = 'CPE04115_H_kw_20210526212214_ARTRFDC_168'

In [10]:
# build a path to the SLDB json file
data_dir = '{}/{}/{}'.format(PROJECT_ROOT, 'sldbs', sldb_id)

# then get the ts_identifier from the json file in the sldb directory
sldb_json_file = '{}/sldb.json'.format(data_dir)

In [11]:
# open the json file
with open(sldb_json_file, 'r') as inputfile:
    sldb_dict = json.load(inputfile)

In [12]:
# and get the time series identifier
ts_identifier = sldb_dict['ts']

In [13]:
# use the time series identifier to obtain the SK-Learn scaler used on it
# get the scaler used to normalize the test dataset (unseen)
scaler_test = joblib.load('{}/{}/{}/scaler_test.save'.format(PROJECT_ROOT,
                                                             'timeseries',
                                                             ts_identifier))

print('Scaler on test dataset loaded for time series {}'.format(ts_identifier))

Scaler on test dataset loaded for time series CPE04115_H_kw_20210526212214


In [14]:
# pass all the code to a single notebook cell, then to a function, later...

In [15]:
# during batch prediction, the model identifier is obtained via Abseil Flags
model_id = 'ARTRFDC_TPU_000'

In [16]:
# during batch prediction, the dataset name is obtained via Abseil Flags
dataset = 'test'

In [17]:
# during batch prediction, the execution identifier is obtained via Abseil Flags
execution = 9

In [18]:
# use model identifier and execution number to build the model directory string
model_dir = '{}_{:02d}'.format(model_id, execution)

In [19]:
# get the path to the saved model main directory
saved_model_path = '{}/{}/{}/export/exporter'.format(PROJECT_ROOT,
                                                     'models',
                                                     model_dir)

In [20]:
# get all the files in the saved model path, to find the most recent one
all_files = os.listdir(saved_model_path)
# get the path to the most recent saved model
latest_saved_model_id = sorted(all_files)[-1]

In [21]:
# build the full path for the latest saved model dir
export_dir = '{}/{}'.format(saved_model_path, latest_saved_model_id)
print ('Exported model path is {}'.format(export_dir))

Exported model path is /home/developer/gcp/cbidmltsf/models/ARTRFDC_TPU_000_09/export/exporter/1622212478


In [57]:
# let's review the saved model, the expert way
! saved_model_cli show --dir /home/developer/gcp/cbidmltsf/models/ARTRFDC_TPU_000_09/export/exporter/1622212478 \
--tag_set serve --signature_def serving_default

2021-05-31 17:21:29.353487: W tensorflow/stream_executor/platform/default/dso_loader.cc:60] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2021-05-31 17:21:29.353519: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.
The given SavedModel SignatureDef contains the following input(s):
  inputs['example_bytes'] tensor_info:
      dtype: DT_STRING
      shape: ()
      name: Placeholder:0
The given SavedModel SignatureDef contains the following output(s):
  outputs['forecast'] tensor_info:
      dtype: DT_FLOAT
      shape: (0, 169, 1)
      name: time_distributed/Reshape_1:0
Method name is: tensorflow/serving/predict


In [58]:
# and compare the previous output with the one of a fully functional serving instance
! saved_model_cli show --dir /home/developer/gcp/cbidmltsf/models/EDSLSTM_TPU_000_09/export/exporter/1615317687 \
--tag_set serve --signature_def serving_default

2021-05-31 17:24:42.828943: W tensorflow/stream_executor/platform/default/dso_loader.cc:60] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2021-05-31 17:24:42.828974: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.
The given SavedModel SignatureDef contains the following input(s):
  inputs['example_bytes'] tensor_info:
      dtype: DT_STRING
      shape: ()
      name: Placeholder:0
The given SavedModel SignatureDef contains the following output(s):
  outputs['forecast'] tensor_info:
      dtype: DT_FLOAT
      shape: (1, 24, 1)
      name: time_distributed_3/Reshape_1:0
Method name is: tensorflow/serving/predict


In [None]:
# which makes evident that the serving signature of the ARTRFDC model is wrong!
# therefore break the previous compact format of source_target tensor and get back to source, target

In [22]:
# load the saved model and the prediction function
imported = load(export_dir=export_dir, tags='serve')
predict_fn = imported.signatures["serving_default"]

In [23]:
predict_fn

<ConcreteFunction pruned(example_bytes) at 0x7F0758125A10>

In [24]:
# build a path to the dataset for prediction
dataset_path = '{}/{}.tfrecord'.format(data_dir, dataset)

# load the dataset
tfrecord_dataset = tf.data.TFRecordDataset(dataset_path)

In [25]:
dataset_path

'/home/developer/gcp/cbidmltsf/sldbs/CPE04115_H_kw_20210526212214_ARTRFDC_168/test.tfrecord'

In [26]:
# a list to temporarily store the rows in the dataset
tfrecord_list = list()

In [27]:
for serial_example in tfrecord_dataset:
    tfrecord_list.append(serial_example)

In [28]:
len(tfrecord_list)

2040

In [30]:
tfrecord_list[0]

<tf.Tensor: shape=(), dtype=string, numpy=b"\n\x88A\n\xee\x1b\n\ttimestamp\x12\xe0\x1b\n\xdd\x1b\n\x132018-05-01 00:00:00\n\x132018-05-01 01:00:00\n\x132018-05-01 02:00:00\n\x132018-05-01 03:00:00\n\x132018-05-01 04:00:00\n\x132018-05-01 05:00:00\n\x132018-05-01 06:00:00\n\x132018-05-01 07:00:00\n\x132018-05-01 08:00:00\n\x132018-05-01 09:00:00\n\x132018-05-01 10:00:00\n\x132018-05-01 11:00:00\n\x132018-05-01 12:00:00\n\x132018-05-01 13:00:00\n\x132018-05-01 14:00:00\n\x132018-05-01 15:00:00\n\x132018-05-01 16:00:00\n\x132018-05-01 17:00:00\n\x132018-05-01 18:00:00\n\x132018-05-01 19:00:00\n\x132018-05-01 20:00:00\n\x132018-05-01 21:00:00\n\x132018-05-01 22:00:00\n\x132018-05-01 23:00:00\n\x132018-05-02 00:00:00\n\x132018-05-02 01:00:00\n\x132018-05-02 02:00:00\n\x132018-05-02 03:00:00\n\x132018-05-02 04:00:00\n\x132018-05-02 05:00:00\n\x132018-05-02 06:00:00\n\x132018-05-02 07:00:00\n\x132018-05-02 08:00:00\n\x132018-05-02 09:00:00\n\x132018-05-02 10:00:00\n\x132018-05-02 11:00:00\n\x

In [31]:
# get the SLDB parameters for the forecasting model
config_json_file = '{}/{}/{}.json'.format(PROJECT_ROOT,
                                          'parameters',
                                          model_id)

In [32]:
# recover the sldb dictionary from the json file in parameters/
with open(config_json_file, 'r') as inputfile:
    configuration = json.load(inputfile)

In [33]:
# store the objective shapes for reshaping tensors in a dictionary
_EXTRACTING_OBJECTIVE_SHAPES = {
    'source_target': [configuration['num_timesteps'] + 1, configuration['model_dimension']],
    'timestamp': [configuration['num_timesteps'] + 1, 1]
}

_EXTRACTING_OBJECTIVE_SHAPES

{'source_target': [169, 7], 'timestamp': [169, 1]}

In [28]:
# from now on, inferences for ARTRFDC are produced in a very different way
# from the used for DMSLSTM or EDALSTM models (prediction process has to be iterative)

In [29]:
# each predicted row is a sequence of n_timesteps values,
# but only the first element in this sequence is used, as the first prediction,
# then it is added (along with its positional encodings) to the end of the input sequence
# (first entry of the input sequence is discarded to keep tensor shape)
# to get the second prediction, and so on up to the n_timesteps-th prediction,
# which completes the n_timesteps prediction sequence (the forecast window)
# that starts immediately after the source input sequence ends (in time dimension)

In [30]:
# the iterative process for inference over the ARTRFDC saved model can be initiated now:
# source feature (?, 168, 7) (unseen data) is on parsed_dataset[0]['source']
# target feature (?, 168, 7) (unseen data) is on parsed_dataset[1]

# uncomment and run the following two cells to confirm that

In [31]:
# it is not possible to iterate over a segment of a dataset, as required by iterative inference
# then the complete test dataset will be passed to two NumPy arrays:

# source_array, with shape (n_rows, n_timesteps, n_features), in this example (2095, 168, 7), and
# target_array, with shape (n_rows, n_timesteps, n_features), in this example (2095, 168, 7)

# remember source_array[1:, :, :] = target_array[:-2, :, :]

In [49]:
# build temporary lists to store source (features) and target (labels) tensors
source_list = list()
source_timestamp_list = list()

target_list = list()
target_timestamp_list = list()

# fill in the lists from the parsed dataset
for element in parsed_dataset:
    source_list.append(element[0]['source'])
    # convert timestamps from bytes to strings to flat array to datetime
    source_timestamp_list.append(pd.to_datetime(element[0]['timestamp'].numpy().astype(str).squeeze()))
    
    target_list.append(element[1]['target'])
    # convert timestamps from bytes to strings to flat array to datetime
    target_timestamp_list.append(pd.to_datetime(element[1]['timestamp'].numpy().astype(str).squeeze()))

# from the temporary lists, build NumPy arrays to feed the model
source_array = np.array(source_list)
source_timestamp_array = np.array(source_timestamp_list)

target_array = np.array(target_list)
target_timestamp_array = np.array(target_timestamp_list)

In [50]:
# verify shape of resulting arrays
source_array.shape, source_timestamp_array.shape, target_array.shape, target_timestamp_array.shape

((2040, 168, 7), (2040, 168), (2040, 168, 7), (2040, 168))

In [34]:
# now follow the inference process detailed in Klingenbrunn to:
# predict over the forecast window,
# calculate prediction error metrics, and
# plot prediction results

In [35]:
# define a forecast window to guide the iterative prediction process
# start with a hourly, day-ahead process
forecast_window = 24

In [36]:
# in the source array, the index of the last row that can be used
# to build a forecast-window sized set of predictions
max_row_index = len(source_list) - configuration['num_timesteps'] - 1

print('Given the current dataset and the number of timesteps in the input sequence,')
print('predictions can be made between row 0 and row {} of the source array.'.format(max_row_index))

Given the current dataset and the number of timesteps in the input sequence,
predictions can be made between row 0 and row 1871 of the source array.


In [37]:
# the first source or input to the model is the first source row
# that means, the true variable value, plus the six positional encodings for the timestamp
# in the first row of the test dataset

In [38]:
# uncomment the following line to get the source as a tensor with TensorShape([1, 168, 7])
# source_tensor = tf.expand_dims(source_array[0, :, :], axis=0)

In [39]:
# important, the inference cycle was coded for tensors, not for NumPy arrays
# then use source and prediction tensors and translate to tensor examples from float tensors

In [40]:
# a columns list for the predictions detail dataframe
pred_df_columns = ['model_id', 'execution', 'dataset', 'string_timestamps', 'predictions', 'targets']
predictions_detail_df = pd.DataFrame(columns=pred_df_columns)

for row in np.arange(max_row_index + 1):

    # then again, build the initial source tensor
    source_tensor = tf.expand_dims(source_array[row, :, :], axis=0)

    # and build the initial prediction tensor
    # a forecast-window-sized tensor (1, forecast_window, 7)
    # formed with the forecast_window true values, starting at the end of the source tensor
    # that means

    prediction_tensor = tf.expand_dims(
        source_array[row + configuration['num_timesteps'], :forecast_window, :],
        axis=0)

    # remember source_array is (?, timesteps, features), but source_timestamp_array is (?, timesteps) only
    prediction_timestamps = source_timestamp_array[row + configuration['num_timesteps'], :forecast_window]


    next_input_model = source_tensor

    # re-initialize the prediction list previously used for prediction over TFRecords
    predictions_list = list()

    # fill the predictions list over the forecast window
    for i in range(forecast_window):

        # from the current next_input_model tensor (1, 168, 7)
        # get a prediction as NumPy array (1, 168, 1)
        prediction = predict_fn(tensor_to_tensor_example(next_input_model))['forecast'].numpy()

        # get the value of the most recent prediction (last timestep) into the predictions list
        predictions_list.append(prediction[:, -1, :][0][0])

        # from the source tensor, get the positional encodings for ti+1 to t167 (that is 168-i-1 values)
        pos_encoding_old_values = source_tensor[:, i+1:, 1:]

        # from target tensor, get the positional encodings for t168 to t168+i (that is i+1 values)
        pos_encoding_new_val = prediction_tensor[:, :i+1, 1:]

        # build new positional encodings with 168 values
        pos_encodings = tf.concat([pos_encoding_old_values, pos_encoding_new_val], axis=1)
        pos_encodings = tf.cast(pos_encodings, dtype=tf.float32)

        # build the values feature for the next input to the model
        # pop i+1 values at the beginning of the previous input
        value_feature_old_values = tf.expand_dims(source_tensor[:, i+1:, 0], axis=-1)
        value_feature_old_values = tf.cast(value_feature_old_values, dtype=tf.float32)

        # current predictions_list to NumPy array
        value_feature_new_values = np.array(predictions_list[:i+1])

        # current prediction array to tensor
        value_feature_new_values = tf.convert_to_tensor(value_feature_new_values)

        # expand dimensions of current prediction tensor to single-value feature
        value_feature_new_values = tf.expand_dims(value_feature_new_values, axis=-1)

        # expand dimensions of current prediction tensor to single-value batch
        value_feature_new_values = tf.expand_dims(value_feature_new_values, axis=0)

        # build the value feature tensor
        next_input_model = tf.concat([value_feature_old_values, value_feature_new_values], axis=1)

        # build the next input tensor for the model
        next_input_model = tf.concat([next_input_model, pos_encodings], axis=2)


    # iterative predictions over the forecast window reside in predictions_list
    # convert list to array, then expand feature dimension with value 1
    predicted_values = np.array(predictions_list).reshape(-1, 1)

    # inverse-scale predictions
    rescaled_predicted_values = scaler.inverse_transform(predicted_values)

    # and the true values remain in the prediction tensor, pass them to a NumPy array
    # for the true values array, expand feature dimension with value 1
    true_values = prediction_tensor[0, :, 0].numpy().reshape(-1, 1)

    # inverse-scale true values
    rescaled_true_values = scaler.inverse_transform(true_values)

    # a temporary dataframe built from the data in the current row
    df = pd.DataFrame(columns=pred_df_columns)
    df['model_id'] = [model_id]
    df['execution'] = [execution]
    df['dataset'] = [dataset]
    df['string_timestamps']= [pd.to_datetime(prediction_timestamps).astype(str).tolist()]
    df['predictions'] = [np.squeeze(rescaled_predicted_values).tolist()]
    df['targets'] = [np.squeeze(rescaled_true_values).tolist()]
    df['mae'] = mean_absolute_error(rescaled_true_values, rescaled_predicted_values)
    df['rmse'] = sqrt(mean_squared_error(rescaled_true_values, rescaled_predicted_values))
    df['smape'] = smape(rescaled_true_values, rescaled_predicted_values)

    # append the temporary dataframe to the predictions detail dataframe
    predictions_detail_df = pd.concat([predictions_detail_df, df])

# reset the index of final dataframe
predictions_detail_df = predictions_detail_df.reset_index(drop=True)

InvalidArgumentError:  Input to reshape is a tensor with 0 values, but the requested shape has 1183
	 [[node Reshape (defined at <ipython-input-22-fad56071c367>:2) ]] [Op:__inference_pruned_1423]

Function call stack:
pruned


In [164]:
# build a predictions summary dataframe, reset index to avoid making a multi-column index when grouping by
predictions_summary_df = predictions_detail_df.groupby(['model_id', 'execution', 'dataset']).mean().reset_index()

In [168]:
# a range to iterate on prediction timesteps
targets_range = np.arange(forecast_window)

In [170]:
# vector metric (vector component to vector component)
# an array forecast_window-d: metric for 1, 2,..., no_targets step-ahead (target versus prediction for rows in dataset)

In [171]:
# for index, row in dataframe.iterrows()
mae_vector = [
    mean_absolute_error(
        # a list with the n-rows target values for the n-th step ahead
        [row.targets[n] for _, row in predictions_detail_df.iterrows()],
        # a list with the n-rows prediction values for the n-th step ahead
        [row.predictions[n] for _, row in predictions_detail_df.iterrows()]
    ) for n in targets_range
]
predictions_summary_df['mae_vector'] = [mae_vector]

In [173]:
# for index, row in dataframe.iterrows()
rmse_vector = [
    sqrt(mean_squared_error(
        # a list with the n-rows target values for the n-th step ahead
        [row.targets[n] for _, row in predictions_detail_df.iterrows()],
        # a list with the n-rows prediction values for the n-th step ahead
        [row.predictions[n] for _, row in predictions_detail_df.iterrows()]
    )) for n in targets_range
]
predictions_summary_df['rmse_vector'] = [rmse_vector]

In [174]:
# for index, row in dataframe.iterrows()
smape_vector = [
    smape(
        [row.targets[n] for _, row in predictions_detail_df.iterrows()],
        [row.predictions[n] for _, row in predictions_detail_df.iterrows()]
    ) for n in targets_range
]
predictions_summary_df['smape_vector'] = [smape_vector]

In [175]:
# insert count of rows as a column value
predictions_summary_df.insert(3, 'count', max_row_index + 1)

In [187]:
predictions_summary_df

Unnamed: 0,model_id,execution,dataset,count,mae,rmse,smape,mae_vector,rmse_vector,smape_vector
0,ARTRFDC_TPU_001,0,test,1927,108.406593,134.492255,0.04288,"[48.151058179205044, 69.42947142224394, 81.947...","[66.10266580277933, 92.11263279852528, 108.899...","[0.01919004221083813, 0.027874015071866175, 0...."


In [177]:
# build a path to persist the dataframe to database/predictions_detail/
detail_pickle_path = '{}/{}/{}/{}_{:02d}_on_{}_tfrecord.pkl'.format(
    PROJECT_ROOT,
    'database',
    'predictions_detail',
    model_id,
    execution,
    dataset)

In [179]:
# persist the Pandas dataframe to database/predictions_detail/
predictions_detail_df.to_pickle(detail_pickle_path)
print('Persisted Pandas dataframe for predictions detail of {}_{:02d} on {}.tfrecord'.format(model_id,
                                                                                      execution,
                                                                                      dataset))

Persisted Pandas dataframe for predictions detail of ARTRFDC_TPU_001_00 on test.tfrecord


In [180]:
# build a path to persist the dataframe to database/predictions_summary/
summary_pickle_path = '{}/{}/{}/{}_{:02d}_on_{}_tfrecord.pkl'.format(
    PROJECT_ROOT,
    'database',
    'predictions_summary',
    model_id,
    execution,
    dataset)

In [181]:
# persist the Pandas dataframe to database/predictions_summary/
predictions_summary_df.to_pickle(summary_pickle_path)
print('Persisted Pandas dataframe for predictions summary of {}_{:02d} on {}.tfrecord'.format(model_id,
                                                                                      execution,
                                                                                      dataset))

Persisted Pandas dataframe for predictions summary of ARTRFDC_TPU_001_00 on test.tfrecord


In [182]:
# how does behave SMAPE on forecast_window-th-step-ahead predictions, over the test dataset
# (as predictions go further from the training and evaluation datasets periods)

In [184]:
smape = figure(
    # x_axis_type='datetime',
    plot_width=960,
    plot_height=400,
    title='SMAPE for model {}, execution {}, on {} dataset'.format(
        model_id,
        execution,
        dataset))

smape.grid.grid_line_alpha=0.5

smape.xaxis.axis_label = 'Row of test dataset'
smape.yaxis.axis_label = 'SMAPE for the forecast window'

smape.line(
    np.arange(max_row_index + 1),
    predictions_detail_df['smape'],
    color='red',
    legend_label='SMAPE')

# uncomment the following two lines to save plot
# output_file('/home/developer/gcp/cbidmltsf/datasets/cfe/{}_H_kw.html'.format(device))
# save(fig_kw)

# uncomment the following line to display plot
show(smape)

In [189]:
# a dictionary to manage plots
plots = dict()

# plot predictions for a single row in the test dataset
# row = 266
# row = 1377
row = 24

plots['identifier'] = figure(
    x_axis_type='datetime',
    plot_width=960,
    plot_height=400,
    title='Multi-step prediction for model {}, execution {}, on {} dataset, SMAPE = {}'.format(
        model_id,
        execution,
        dataset,
        predictions_detail_df.loc[row, 'smape']))

plots['identifier'].grid.grid_line_alpha=0.5

plots['identifier'].xaxis.axis_label = 'Timestamp'
plots['identifier'].yaxis.axis_label = 'Active Power [KW]'

plots['identifier'].line(
    pd.to_datetime(predictions_detail_df.loc[row, 'string_timestamps']),
    predictions_detail_df.loc[row, 'predictions'],
    color='green',
    legend_label='real')

plots['identifier'].line(
    pd.to_datetime(predictions_detail_df.loc[row, 'string_timestamps']),
    predictions_detail_df.loc[row, 'targets'],
    color='blue',
    legend_label='predicted')

# uncomment the following two lines to save plot
# output_file('/home/developer/gcp/cbidmltsf/datasets/cfe/{}_H_kw.html'.format(device))
# save(fig_kw)

# uncomment the following line to display plot
show(plots['identifier'])