## Recursive inference (multi-step) for BSCTRFM models

### use time series instead of SLDB arrays for easier and more efficient timestamp management

In [1]:
import os
import json
import numpy as np
import pandas as pd
import joblib
from datetime import datetime
from math import sqrt
from sklearn.metrics import mean_squared_error, mean_absolute_error

# uncomment the following line for compatibility with TensorFlow 1.15 (on GCP)
# import tensorflow.compat.v1 as tf
# uncomment the following line for TensorFlow 2.X (local execution)
import tensorflow as tf

# forecast model was saved in TensorFlow 1.15
# but, in order to make predictions locally, has to be loaded with TensorFlow 2
from tensorflow.saved_model import load

In [2]:
from bokeh.plotting import figure, show, output_file, save
from bokeh.io import output_notebook
# select a palette
from bokeh.palettes import d3
output_notebook()

In [3]:
# symmetrical mean absolute percentage error
def smape(targets, predictions):
    '''
    predictions: a list with the predicted values
    targets: a list with the actual values
    '''
    import numpy as np
    # lists to NumPy arrays
    targets, predictions = np.array(targets), np.array(predictions)
    # verify predictions and targets have the same shape
    if predictions.shape == targets.shape:
            return(np.sum(2*np.abs(predictions - targets) /
                          (np.abs(targets) + np.abs(predictions)))/predictions.shape[0])

In [4]:
# a function to encode float values for serialized examples
def _float_feature_from_list_of_values(list_of_values):
    """Returns a float_list from a list of floats / doubles."""
    return tf.train.Feature(float_list=tf.train.FloatList(value=list_of_values))

In [5]:
# converts a set of tensors to a feature dict to a serialized example to pass it
# to the prediction function of the saved model 
def input_tensors_to_serialized_example(encoder_input_float_tensor, decoder_input_float_tensor):
    # first, pass the float tensors to NumPy array, then flatten them
    encoder_input_flat_array = encoder_input_float_tensor.numpy().flatten()
    decoder_input_flat_array = decoder_input_float_tensor.numpy().flatten()
    # second, build the protobuffer example
    example = tf.train.Example(
        # features within the example
        features=tf.train.Features(
            # feature definition
            feature={
                'encoder_input': _float_feature_from_list_of_values(encoder_input_flat_array),
                'decoder_input': _float_feature_from_list_of_values(decoder_input_flat_array)
            }
        )
    )    
    # third, serialize the example dictionary to a string
    serialized_example = example.SerializeToString()
    # fourth, wrap the serialized example as a NumPy-string array
    numpy_example = np.array(serialized_example, dtype='S')
    # fifth, wrap the NumPy-string array as a string tensor
    serialized_example = tf.convert_to_tensor(numpy_example)

    return serialized_example

In [6]:
# now get the time series for the test dataset (unseen data)

In [7]:
# define a identifier string to access to the preprocessed time series
identifier = 'CPE04115_H_kw_20210526212214'

In [8]:
# build the time series directory
time_series_folder = '/home/developer/gcp/cbidmltsf/timeseries/{}'.format(identifier)

In [9]:
# use a dictionary to remain the code consistent with the SLDB building process
# most of the times, only ts['test'] will be used for inference
# however, ts['eval'] might also be used, as it have not really been seen by training process
# (no tranining modification resulted from evaluation stage)

In [10]:
ts = dict()

In [11]:
# use test set for the following operations
stage = 'test'

In [12]:
ts[stage] = pd.read_pickle('{}/ts_{}.pkl'.format(time_series_folder, stage))

In [13]:
# how many lectures in the loaded time series
ts[stage]['kw_scaled'].count()

2208

In [14]:
# start and end timestamp of the loaded time series
ts[stage].index[0], ts[stage].index[-1]

(Timestamp('2018-05-01 00:00:00'), Timestamp('2018-07-31 23:00:00'))

### original positional encodings

In [17]:
hours_in_day = 24
days_in_month = 30
months_in_year = 12

# build arrays with indexes hour, day, and month
timestamp_hour = np.array(ts[stage].index.hour)
timestamp_day = np.array(ts[stage].index.day)
timestamp_month = np.array(ts[stage].index.month)

# build arrays with positional encoding components and cast them to float32
sin_hour = np.sin(2*np.pi*timestamp_hour/hours_in_day).astype(np.float32)
cos_hour = np.cos(2*np.pi*timestamp_hour/hours_in_day).astype(np.float32)

sin_day = np.sin(2*np.pi*timestamp_day/days_in_month).astype(np.float32)
cos_day = np.cos(2*np.pi*timestamp_day/days_in_month).astype(np.float32)

sin_month = np.sin(2*np.pi*timestamp_month/months_in_year).astype(np.float32)
cos_month = np.cos(2*np.pi*timestamp_month/months_in_year).astype(np.float32)

# now expand the time series dataframe with positional encoding components
# pass the pos encoding arrays to dataframe as lists
ts[stage]['sin_hour'] = list(sin_hour)
ts[stage]['cos_hour'] = list(cos_hour)
ts[stage]['sin_day'] = list(sin_day)
ts[stage]['cos_day'] = list(cos_day)
ts[stage]['sin_month'] = list(sin_month)
ts[stage]['cos_month'] = list(cos_month)

### new positional encodings

In [97]:
hours_in_day = 24
days_in_week = 7
weeks_in_year = 53

# hour of the day: 0-23
timestamp_hour_day = np.array(ts[stage].index.hour)
# day of the week: 0-6
day_week_list = [timestamp.weekday() for timestamp in ts[stage].index]
timestamp_day_week = np.array(day_week_list)
# pd.timestamp.week values go from 1 to 53
# adjust them to 0-52
week_values = ts[stage].index.week - 1
timestamp_week_year = np.array(week_values)

# build arrays with positional encoding components and cast them to float32
sin_hour_day = np.sin(2*np.pi*timestamp_hour_day/hours_in_day).astype(np.float32)
cos_hour_day = np.cos(2*np.pi*timestamp_hour_day/hours_in_day).astype(np.float32)

sin_day_week = np.sin(2*np.pi*timestamp_day_week/days_in_week).astype(np.float32)
cos_day_week = np.cos(2*np.pi*timestamp_day_week/days_in_week).astype(np.float32)

sin_week_year = np.sin(2*np.pi*timestamp_week_year/weeks_in_year).astype(np.float32)
cos_week_year = np.cos(2*np.pi*timestamp_week_year/weeks_in_year).astype(np.float32)

# now expand the time series dataframe with positional encoding components
# pass the pos encoding arrays to dataframe as lists
ts[stage]['sin_hour_day'] = list(sin_hour_day)
ts[stage]['cos_hour_day'] = list(cos_hour_day)
ts[stage]['sin_day_week'] = list(sin_day_week)
ts[stage]['cos_day_week'] = list(cos_day_week)
ts[stage]['sin_week_year'] = list(sin_week_year)
ts[stage]['cos_week_year'] = list(cos_week_year)

In [98]:
ts[stage]

Unnamed: 0_level_0,kw_scaled,sin_hour_day,cos_hour_day,sin_day_week,cos_day_week,sin_week_year,cos_week_year
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2018-05-01 00:00:00,0.277562,0.000000,1.000000,0.781832,0.62349,0.902798,-0.430065
2018-05-01 01:00:00,0.174138,0.258819,0.965926,0.781832,0.62349,0.902798,-0.430065
2018-05-01 02:00:00,0.114769,0.500000,0.866025,0.781832,0.62349,0.902798,-0.430065
2018-05-01 03:00:00,0.099625,0.707107,0.707107,0.781832,0.62349,0.902798,-0.430065
2018-05-01 04:00:00,0.080639,0.866025,0.500000,0.781832,0.62349,0.902798,-0.430065
...,...,...,...,...,...,...,...
2018-07-31 19:00:00,0.651798,-0.965926,0.258819,0.781832,0.62349,-0.403123,-0.915146
2018-07-31 20:00:00,0.656658,-0.866025,0.500000,0.781832,0.62349,-0.403123,-0.915146
2018-07-31 21:00:00,0.691807,-0.707107,0.707107,0.781832,0.62349,-0.403123,-0.915146
2018-07-31 22:00:00,0.606559,-0.500000,0.866025,0.781832,0.62349,-0.403123,-0.915146


In [19]:
# from now on, all data required for inference will be extracted from the time series
# therefore, remove all references to SLDB datasets

In [20]:
PROJECT_ROOT = '/home/developer/gcp/cbidmltsf'

In [21]:
# during batch prediction, the model identifier is obtained via Abseil Flags
# remember this notebook is based on local execution,
# therefore model directory must be downloaded from GS before running the notebook
model_id = 'BSCTRFM_TPU_007'

# during batch prediction, the SLDB identifier is obtained via Abseil Flags
# THE SLDB FOR INFERENCE MUST BE THE SAME USED FOR TRAINING! (THE ONE SETUP IN THE CONFIGURATION FILE)
sldb_id = 'CPE04115_H_kw_20210526212214_BSCTRFM_096_096'

# during batch prediction, the dataset name is obtained via Abseil Flags
dataset = 'test'

# define a forecast window to guide the iterative prediction process
# start with a hourly, day-ahead process
forecast_window = 24

# ADD AN INFERENCE IDENTIFIER, BECAUSE FOR TRANSFORMER-BASED MODELS, DIFFERENT INFERENCES
# CAN BE PRODUCED FROM A SINGLE SAVED MODEL (USUALLY TO PRODUCE DIFFERENT FORECAST WINDOWS)
# during batch prediction, the inference identifier should be obtained via Abseil Flags
inference = '{:03d}'.format(forecast_window)

In [22]:
# build a path to the SLDB json file
data_dir = '{}/{}/{}'.format(PROJECT_ROOT, 'sldbs', sldb_id)

# then get the ts_identifier from the json file in the sldb directory
sldb_json_file = '{}/sldb.json'.format(data_dir)

In [23]:
# open the json file
with open(sldb_json_file, 'r') as inputfile:
    sldb_dict = json.load(inputfile)

In [24]:
# and get the time series identifier
ts_identifier = sldb_dict['ts']
ts_identifier

'CPE04115_H_kw_20210526212214'

In [25]:
# use the time series identifier to obtain the SK-Learn scaler used on it
# get the scaler used to normalize the test dataset (unseen)
scaler_test = joblib.load('{}/{}/{}/scaler_test.save'.format(PROJECT_ROOT,
                                                             'timeseries',
                                                             ts_identifier))

print('Scaler on test dataset loaded for time series {}'.format(ts_identifier))

Scaler on test dataset loaded for time series CPE04115_H_kw_20210526212214


In [26]:
# get the SLDB parameters for the forecasting model
config_json_file = '{}/{}/{}.json'.format(PROJECT_ROOT,
                                          'parameters',
                                          model_id)

# recover the sldb dictionary from the json file in parameters/
with open(config_json_file, 'r') as inputfile:
    configuration = json.load(inputfile)

In [27]:
# retrieve the prediction function and test it with the adequate tensor-examples

In [28]:
# use model identifier and execution number to build the model directory string
execution = 3
model_dir = '{}_{:02d}'.format(model_id, execution)
model_dir

'BSCTRFM_TPU_007_03'

In [29]:
# get the path to the saved model main directory
saved_model_path = '{}/{}/{}/export/exporter'.format(PROJECT_ROOT,
                                                     'models',
                                                     model_dir)
saved_model_path

'/home/developer/gcp/cbidmltsf/models/BSCTRFM_TPU_007_03/export/exporter'

In [30]:
# get all the files in the saved model path, to find the most recent one
all_files = os.listdir(saved_model_path)
# get the path to the most recent saved model
latest_saved_model_id = sorted(all_files)[-1]
latest_saved_model_id

'1626881237'

In [31]:
# build the full path for the latest saved model dir
export_dir = '{}/{}'.format(saved_model_path, latest_saved_model_id)
print ('Exported model path is {}'.format(export_dir))

Exported model path is /home/developer/gcp/cbidmltsf/models/BSCTRFM_TPU_007_03/export/exporter/1626881237


In [32]:
# load the saved model and the prediction function
imported = load(export_dir=export_dir, tags='serve')
predict_fn = imported.signatures["serving_default"]
predict_fn

<ConcreteFunction pruned(example_bytes) at 0x7FDBEC463FD0>

In [33]:
# test on the main loop

In [29]:
sldb_dict

{'ts': 'CPE04115_H_kw_20210526212214',
 'embedding': {'hourly': 96},
 'tau': {'hourly': 1},
 'no_targets': 96,
 'BSCTRFM': 1,
 'stats': {'train': {'n_rows': 17351},
  'eval': {'n_rows': 2688},
  'test': {'n_rows': 2017}}}

In [30]:
m = sldb_dict['embedding']['hourly']

In [31]:
t = sldb_dict['no_targets']

### start inference process

In [135]:
for start_index in [1008]:
        
    # define first prediction interval with start- and end-index
    # given the interval time_series[start_index:end_index]
    # the conditioning range is the union of the encoder-input and the decoder-input
    # and the prediction range is only the last lecture in the interval,
    # by means of a recursive inference process
    # on each step the last prediction is added to the decoder input
    # and the prediction range grows one step into the future
    
    # get the end-index of this recursive inference interval
    end_index = start_index + (m + t)
    
    # initialize a list to store recurrent predictions for this interval
    predictions_list = list()
    
    for i in np.arange(forecast_window):

        # build the inference interval as a sub-series of the dataset
        sub_series = ts[stage][start_index + i : end_index + i]

        # important: build sources as copies of the sub-series (and therefore of the global time series)
        # to avoid overwriting the original dataset

        # the encoder input source
        encoder_input = sub_series[:m].copy()

        # the decoder input source
        decoder_input = sub_series[m-1:-1].copy()

        # on first step (i=0), the decoder input carries only true values
        # and the predictions list is empty
        # on subsequent steps, the decoder input includes all previous predictions
        # (stored in the predictions list)
        if i > 0:
            decoder_input['kw_scaled'][-i:] = predictions_list

        # the target source, for metrics calculation (first pass)
        target = sub_series[m:].copy()

        # build source tensors from the sub-series    
        encoder_input_tensor = tf.expand_dims(encoder_input, axis=0)
        decoder_input_tensor = tf.expand_dims(decoder_input, axis=0)

        # make input example for the prediction function
        input_example = input_tensors_to_serialized_example(encoder_input_tensor,
                                                            decoder_input_tensor)

        # get the output of the prediction function as a dictionary
        predict_output_dict = predict_fn(input_example)

        # get the prediction output tensor
        predict_output_tensor = predict_output_dict['forecast']

        # get the most recent prediction
        most_recent_prediction = predict_output_tensor[0, :, 0].numpy()[-1]

        # append the most recent prediction timestep to the predictions list
        predictions_list.append(most_recent_prediction)

        # pass the predictions list to an array
        current_predictions_array = np.array(predictions_list).reshape(-1, 1)

        # get the targets vector to be compared with the current predictions array
        current_targets = np.array(target['kw_scaled'][-i-1:]).reshape(-1, 1)

        # calculate SMAPE on the rescaled variable
        rescaled_predictions = scaler_test.inverse_transform(current_predictions_array)
        rescaled_targets = scaler_test.inverse_transform(current_targets)
        current_smape = smape(rescaled_targets, rescaled_predictions)
        # print('SMAPE for the first {} rescaled prediction(s) is {}'.format(i + 1, current_smape))
        


In [136]:
# a dictionary to manage plots
plots = dict()

In [137]:
# get the conditioning range for plotting
conditioning_range = ts[stage][start_index:end_index][:-1]

In [138]:
# pass the variable from series to array
conditioning_range_array = np.array(conditioning_range['kw_scaled'])

# expand dimensionality of array for compliance with scaler
conditioning_range_array = conditioning_range_array.reshape(-1, 1)

# get the rescaled variable for conditioning range
conditioning_range_values = scaler_test.inverse_transform(conditioning_range_array)

In [139]:
# label the plot with the starting timestamp for prediction range
label = str(target.index[-i-1])
label

'2018-06-19 23:00:00'

In [140]:
plots[label] = figure(
    x_axis_type='datetime',
    plot_width=960,
    plot_height=400,
    title='Active power predictions starting on {} with SMAPE={}'.format(label,
                                                                         round(current_smape, 6))
)

plots[label].grid.grid_line_alpha=0.5

plots[label].xaxis.axis_label = 'Timestamp'
plots[label].yaxis.axis_label = 'Active power [KW]'

plots[label].line(
    conditioning_range.index,
    np.squeeze(conditioning_range_values),
    color='blue',
    legend_label='conditioning')

plots[label].line(
    target.index[-i-1:],
    # use the last value for rescaled_targets
    np.squeeze(rescaled_targets),
    color='green',
    legend_label='real')

plots[label].line(
    target.index[-i-1:],
    # use the last value for rescaled predictions
    np.squeeze(rescaled_predictions),
    color='red',
    legend_label='predicted')

plots[label].legend.location = 'bottom_left'

# uncomment the following two lines to save plot
# output_file('/home/developer/gcp/cbidmltsf/datasets/cfe/{}_H_kw.html'.format(device))
# save(plots[label])

# uncomment the following line to display plot
show(plots[label])

### run inference process and build databases

In [76]:
# during batch prediction, the execution identifier is obtained via Abseil Flags
for execution in [3]:
# for execution in [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]:    
    # a columns list for the predictions dataframe
    pred_df_columns = ['model_id',
                       'execution',
                       'dataset',
                       'inference',
                       'string_timestamps',
                       'predictions',
                       'targets']
    
    # build the predictions dataframe
    predictions_detail_df = pd.DataFrame(columns=pred_df_columns)

    # use model identifier and execution number to build the model directory string
    model_dir = '{}_{:02d}'.format(model_id, execution)

    # get the path to the saved model main directory
    saved_model_path = '{}/{}/{}/export/exporter'.format(PROJECT_ROOT,
                                                         'models',
                                                         model_dir)

    # get all the files in the saved model path, to find the most recent one
    all_files = os.listdir(saved_model_path)
    # get the path to the most recent saved model
    latest_saved_model_id = sorted(all_files)[-1]

    # build the full path for the latest saved model dir
    export_dir = '{}/{}'.format(saved_model_path, latest_saved_model_id)
    print ('Exported model path is {}'.format(export_dir))

    # load the saved model and the prediction function
    imported = load(export_dir=export_dir, tags='serve')
    predict_fn = imported.signatures["serving_default"]
    
    # iterate on a set of valid rows of the test dataset
    starting_point = 0 # based on the inference dataset
    span = 8*7*24 # number of weeks expressed in hours
    dataset_row_indexes_list = starting_point + np.arange(span)
    
    for start_index in dataset_row_indexes_list:

        # define first prediction interval with start- and end-index
        # given the interval time_series[start_index:end_index]
        # the conditioning range is the union of the encoder-input and the decoder-input
        # and the prediction range is only the last lecture in the interval,
        # by means of a recursive inference process
        # on each step the last prediction is added to the decoder input
        # and the prediction range grows one step into the future

        # get the end-index of this recursive inference interval
        end_index = start_index + (m + t)

        # initialize a list to store recurrent predictions for this interval
        predictions_list = list()

        for i in np.arange(forecast_window):

            # build the inference interval as a sub-series of the dataset
            sub_series = ts[stage][start_index + i : end_index + i]

            # important: build sources as copies of the sub-series (and therefore of the global time series)
            # to avoid overwriting the original dataset

            # the encoder input source
            encoder_input = sub_series[:m].copy()

            # the decoder input source
            decoder_input = sub_series[m-1:-1].copy()

            # on first step (i=0), the decoder input carries only true values
            # and the predictions list is empty
            # on subsequent steps, the decoder input includes all previous predictions
            # (stored in the predictions list)
            if i > 0:
                decoder_input['kw_scaled'][-i:] = predictions_list

            # the target source, for metrics calculation (first pass)
            target = sub_series[m:].copy()

            # build source tensors from the sub-series    
            encoder_input_tensor = tf.expand_dims(encoder_input, axis=0)
            decoder_input_tensor = tf.expand_dims(decoder_input, axis=0)

            # make input example for the prediction function
            input_example = input_tensors_to_serialized_example(encoder_input_tensor,
                                                                decoder_input_tensor)

            # get the output of the prediction function as a dictionary
            predict_output_dict = predict_fn(input_example)

            # get the prediction output tensor
            predict_output_tensor = predict_output_dict['forecast']

            # get the most recent prediction
            most_recent_prediction = predict_output_tensor[0, :, 0].numpy()[-1]

            # append the most recent prediction timestep to the predictions list
            predictions_list.append(most_recent_prediction)

            # pass the predictions list to an array
            # current_predictions_array = np.array(predictions_list).reshape(-1, 1)

            # get the targets vector to be compared with the current predictions array
            # current_targets = np.array(target['kw_scaled'][-i-1:]).reshape(-1, 1)

            # calculate SMAPE on the rescaled variable
            # rescaled_predictions = scaler_test.inverse_transform(current_predictions_array)
            # rescaled_targets = scaler_test.inverse_transform(current_targets)
            # current_smape = smape(rescaled_targets, rescaled_predictions)
            # print('SMAPE for the first {} rescaled prediction(s) is {}'.format(i + 1, current_smape))
        

        # iterative predictions over the forecast window reside in predictions_list
        # convert list to array, then expand feature dimension with value 1
        predicted_values = np.array(predictions_list).reshape(-1, 1)

        # inverse-scale predictions
        rescaled_predicted_values = scaler_test.inverse_transform(predicted_values)

        # and the true values remain in the prediction tensor, pass them to a NumPy array
        # for the true values array, expand feature dimension with value 1
        true_values = np.array(target['kw_scaled'][-i-1:]).reshape(-1, 1)

        # inverse-scale true values
        rescaled_true_values = scaler_test.inverse_transform(true_values)

        # a temporary dataframe built from the data in the current row
        df = pd.DataFrame(columns=pred_df_columns)
        df['model_id'] = [model_id]
        df['execution'] = [execution]
        df['dataset'] = [dataset]
        df['inference'] = [inference]
        df['string_timestamps']= [pd.to_datetime(target.index[-i-1:]).astype(str).tolist()]
        df['predictions'] = [np.squeeze(rescaled_predicted_values).tolist()]
        df['targets'] = [np.squeeze(rescaled_true_values).tolist()]
        df['mae'] = mean_absolute_error(rescaled_true_values, rescaled_predicted_values)
        df['rmse'] = sqrt(mean_squared_error(rescaled_true_values, rescaled_predicted_values))
        df['smape'] = smape(rescaled_true_values, rescaled_predicted_values)

        # append the temporary dataframe to the predictions detail dataframe
        predictions_detail_df = pd.concat([predictions_detail_df, df])

    
    # reset the index of final dataframe, once all of its rows (dataset) have been processed
    predictions_detail_df = predictions_detail_df.reset_index(drop=True)

    # build a predictions summary dataframe, reset index to avoid making a multi-column index when grouping by
    predictions_summary_df = predictions_detail_df.groupby(['model_id',
                                                            'execution',
                                                            'dataset',
                                                            'inference']).mean().reset_index()

    # a range to iterate on prediction timesteps
    targets_range = np.arange(forecast_window)

    # vector metric (vector component to vector component)
    # an array forecast_window-d: metric for 1, 2,..., no_targets step-ahead
    # (target versus prediction for rows in dataset)

    # for index, row in dataframe.iterrows()
    mae_vector = [
        mean_absolute_error(
            # a list with the n-rows target values for the n-th step ahead
            [row.targets[n] for _, row in predictions_detail_df.iterrows()],
            # a list with the n-rows prediction values for the n-th step ahead
            [row.predictions[n] for _, row in predictions_detail_df.iterrows()]
        ) for n in targets_range
    ]
    predictions_summary_df['mae_vector'] = [mae_vector]

    # for index, row in dataframe.iterrows()
    rmse_vector = [
        sqrt(mean_squared_error(
            # a list with the n-rows target values for the n-th step ahead
            [row.targets[n] for _, row in predictions_detail_df.iterrows()],
            # a list with the n-rows prediction values for the n-th step ahead
            [row.predictions[n] for _, row in predictions_detail_df.iterrows()]
        )) for n in targets_range
    ]
    predictions_summary_df['rmse_vector'] = [rmse_vector]
    
    # for index, row in dataframe.iterrows()
    smape_vector = [
        smape(
            [row.targets[n] for _, row in predictions_detail_df.iterrows()],
            [row.predictions[n] for _, row in predictions_detail_df.iterrows()]
        ) for n in targets_range
    ]
    predictions_summary_df['smape_vector'] = [smape_vector]

    # insert count of rows as a column value
    predictions_summary_df.insert(4, 'count', len(dataset_row_indexes_list))

    # build a path to persist the dataframe to database/predictions_detail/
    detail_pickle_path = '{}/{}/{}/{}_{:02d}_{}_{}.pkl'.format(
        PROJECT_ROOT,
        'database',
        'predictions_detail',
        model_id,
        execution,
        dataset,
        inference)
    
    # persist the Pandas dataframe to database/predictions_detail/
    predictions_detail_df.to_pickle(detail_pickle_path)
    print('Persisted Pandas dataframe for predictions detail of {}_{:02d}_{}_{}'.format(model_id,
                                                                                        execution,
                                                                                        dataset,
                                                                                        inference))

    # build a path to persist the dataframe to database/predictions_summary/
    summary_pickle_path = '{}/{}/{}/{}_{:02d}_{}_{}.pkl'.format(
        PROJECT_ROOT,
        'database',
        'predictions_summary',
        model_id,
        execution,
        dataset,
        inference)

    # persist the Pandas dataframe to database/predictions_summary/
    predictions_summary_df.to_pickle(summary_pickle_path)
    print('Persisted Pandas dataframe for predictions summary of {}_{:02d}_{}_{}'.format(model_id,
                                                                                         execution,
                                                                                         dataset,
                                                                                         inference))


Exported model path is /home/developer/gcp/cbidmltsf/models/BSCTRFM_TPU_007_03/export/exporter/1626881237
Persisted Pandas dataframe for predictions detail of BSCTRFM_TPU_007_03_test_024
Persisted Pandas dataframe for predictions summary of BSCTRFM_TPU_007_03_test_024


In [77]:
predictions_detail_df

Unnamed: 0,model_id,execution,dataset,inference,string_timestamps,predictions,targets,mae,rmse,smape
0,BSCTRFM_TPU_007,3,test,024,"[2018-05-08 23:00:00, 2018-05-09 00:00:00, 201...","[2378.643310546875, 2146.348876953125, 1968.90...","[2419.4633333333336, 2126.8216666666667, 1940....",102.278721,137.730518,0.038545
1,BSCTRFM_TPU_007,3,test,024,"[2018-05-09 00:00:00, 2018-05-09 01:00:00, 201...","[2176.59765625, 1969.978515625, 1882.275390625...","[2126.8216666666667, 1940.4016666666666, 1822....",118.202064,152.906036,0.045489
2,BSCTRFM_TPU_007,3,test,024,"[2018-05-09 01:00:00, 2018-05-09 02:00:00, 201...","[1954.02587890625, 1852.53515625, 1774.8841552...","[1940.4016666666666, 1822.6733333333334, 1760....",112.804435,146.038192,0.043060
3,BSCTRFM_TPU_007,3,test,024,"[2018-05-09 02:00:00, 2018-05-09 03:00:00, 201...","[1847.3477783203125, 1777.1114501953125, 1772....","[1822.6733333333334, 1760.7300000000002, 1771....",125.876507,156.162885,0.049012
4,BSCTRFM_TPU_007,3,test,024,"[2018-05-09 03:00:00, 2018-05-09 04:00:00, 201...","[1746.19580078125, 1728.02734375, 1758.4621582...","[1760.7300000000002, 1771.82, 1800.32833333333...",134.632241,162.222237,0.054026
...,...,...,...,...,...,...,...,...,...,...
1339,BSCTRFM_TPU_007,3,test,024,"[2018-07-03 18:00:00, 2018-07-03 19:00:00, 201...","[2774.773681640625, 2708.8994140625, 2774.5773...","[2767.161666666667, 2700.1116666666667, 2753.6...",85.767777,111.149526,0.033671
1340,BSCTRFM_TPU_007,3,test,024,"[2018-07-03 19:00:00, 2018-07-03 20:00:00, 201...","[2717.447021484375, 2762.351318359375, 2890.49...","[2700.1116666666667, 2753.691666666667, 2784.0...",98.316329,124.204578,0.037792
1341,BSCTRFM_TPU_007,3,test,024,"[2018-07-03 20:00:00, 2018-07-03 21:00:00, 201...","[2775.10498046875, 2877.038330078125, 2748.462...","[2753.691666666667, 2784.035, 2622.09166666666...",84.292012,107.895303,0.031966
1342,BSCTRFM_TPU_007,3,test,024,"[2018-07-03 21:00:00, 2018-07-03 22:00:00, 201...","[2874.1748046875, 2757.48779296875, 2477.81665...","[2784.035, 2622.0916666666667, 2386.8699999999...",116.545309,150.089705,0.043726


In [78]:
predictions_summary_df

Unnamed: 0,model_id,execution,dataset,inference,count,mae,rmse,smape,mae_vector,rmse_vector,smape_vector
0,BSCTRFM_TPU_007,3,test,24,1344,107.90304,131.786428,0.042043,"[54.83785152132548, 74.10572507464697, 83.5965...","[76.43703977552606, 100.92699177539322, 114.29...","[0.021285578043153314, 0.028905104182779577, 0..."


In [79]:
# a dictionary to manage plots
plots = dict()

In [80]:
plots['smape'] = figure(
    x_axis_type='datetime',
    plot_width=960,
    plot_height=400,
    title='SMAPE for model {}, execution {}, on {} dataset'.format(
        model_id,
        execution,
        dataset))

plots['smape'].grid.grid_line_alpha=0.5

plots['smape'].xaxis.axis_label = 'Starting timestamp of the forecast window'
plots['smape'].yaxis.axis_label = 'SMAPE for the forecast window'

plots['smape'].line(
    # uncomment the following line to plot SMAPE against the starting dataset index of predictions
    # dataset_row_indexes_list,
    # uncomment the following line to plot SMAPE against the starting timestamp of predictions
    ts[stage].index[dataset_row_indexes_list],
    predictions_detail_df['smape'],
    color='red',
    legend_label='SMAPE')

# uncomment the following two lines to save plot
# output_file('/home/developer/gcp/cbidmltsf/datasets/cfe/{}_H_kw.html'.format(device))
# save(fig_kw)

# uncomment the following line to display plot
show(plots['smape'])