## Recursive inference (multi-step) for BSCTRFM models

### use time series instead of SLDB arrays for easier and more efficient timestamp management

In [1]:
import os
import json
import numpy as np
import pandas as pd
import joblib
from datetime import datetime
from math import sqrt
from sklearn.metrics import mean_squared_error, mean_absolute_error

# uncomment the following line for compatibility with TensorFlow 1.15 (on GCP)
# import tensorflow.compat.v1 as tf
# uncomment the following line for TensorFlow 2.X (local execution)
import tensorflow as tf

# forecast model was saved in TensorFlow 1.15
# but, in order to make predictions locally, has to be loaded with TensorFlow 2
from tensorflow.saved_model import load

In [2]:
from bokeh.plotting import figure, show, output_file, save
from bokeh.io import output_notebook
# select a palette
from bokeh.palettes import d3
output_notebook()

In [3]:
# symmetrical mean absolute percentage error
def smape(targets, predictions):
    '''
    predictions: a list with the predicted values
    targets: a list with the actual values
    '''
    import numpy as np
    # lists to NumPy arrays
    targets, predictions = np.array(targets), np.array(predictions)
    # verify predictions and targets have the same shape
    if predictions.shape == targets.shape:
            return(np.sum(2*np.abs(predictions - targets) /
                          (np.abs(targets) + np.abs(predictions)))/predictions.shape[0])

In [4]:
# a function to encode float values for serialized examples
def _float_feature_from_list_of_values(list_of_values):
    """Returns a float_list from a list of floats / doubles."""
    return tf.train.Feature(float_list=tf.train.FloatList(value=list_of_values))

In [5]:
# converts a set of tensors to a feature dict to a serialized example to pass it
# to the prediction function of the saved model 
def input_tensors_to_serialized_example(encoder_input_float_tensor,
                                        decoder_input_float_tensor,
                                        id_float_tensor):
    # first, pass the float tensors to NumPy array, then flatten them
    encoder_input_flat_array = encoder_input_float_tensor.numpy().flatten()
    decoder_input_flat_array = decoder_input_float_tensor.numpy().flatten()
    id_flat_array = id_float_tensor.numpy().flatten()
    
    # second, build the protobuffer example
    example = tf.train.Example(
        # features within the example
        features=tf.train.Features(
            # feature definition
            feature={
                'encoder_input': _float_feature_from_list_of_values(encoder_input_flat_array),
                'decoder_input': _float_feature_from_list_of_values(decoder_input_flat_array),
                'id': _float_feature_from_list_of_values(id_flat_array)
            }
        )
    )    
    # third, serialize the example dictionary to a string
    serialized_example = example.SerializeToString()
    # fourth, wrap the serialized example as a NumPy-string array
    numpy_example = np.array(serialized_example, dtype='S')
    # fifth, wrap the NumPy-string array as a string tensor
    serialized_example = tf.convert_to_tensor(numpy_example)

    return serialized_example

In [6]:
# now get the time series for the test dataset (unseen data)

In [7]:
# define a identifier string to access to the preprocessed time series
identifier = 'LD2011-2014'

In [8]:
# build the time series directory
time_series_folder = '/home/developer/gcp/cbidmltsf/timeseries/{}'.format(identifier)

In [9]:
# use a dictionary to remain the code consistent with the SLDB building process
# most of the times, only ts['test'] will be used for inference
# however, ts['eval'] might also be used, as it have not really been seen by training process
# (no tranining modification resulted from evaluation stage)

In [10]:
ts = dict()

In [11]:
# use test set for the following operations
stage = 'test'

In [12]:
# the time series filename
filename = 'MT320_{}'.format(stage)

In [13]:
'{}/{}.pkl'.format(time_series_folder, filename)

'/home/developer/gcp/cbidmltsf/timeseries/LD2011-2014/MT320_test.pkl'

In [14]:
ts[stage] = pd.read_pickle('{}/{}.pkl'.format(time_series_folder, filename))

In [15]:
ts[stage]

Unnamed: 0,date,token_id,kw_scaled,sin_hour_day,cos_hour_day,sin_day_week,cos_day_week,sin_day_month,cos_day_month,sin_day_year,cos_day_year
9105368,2014-08-18 01:00:00,320,0.410926,0.258819,0.965926,0.000000,1.00000,-0.587785,-0.809017,-0.729558,-0.683919
9105369,2014-08-18 02:00:00,320,0.365127,0.500000,0.866025,0.000000,1.00000,-0.587785,-0.809017,-0.729558,-0.683919
9105370,2014-08-18 03:00:00,320,0.331876,0.707107,0.707107,0.000000,1.00000,-0.587785,-0.809017,-0.729558,-0.683919
9105371,2014-08-18 04:00:00,320,0.311730,0.866025,0.500000,0.000000,1.00000,-0.587785,-0.809017,-0.729558,-0.683919
9105372,2014-08-18 05:00:00,320,0.301258,0.965926,0.258819,0.000000,1.00000,-0.587785,-0.809017,-0.729558,-0.683919
...,...,...,...,...,...,...,...,...,...,...,...
9105866,2014-09-07 19:00:00,320,0.863875,-0.965926,0.258819,-0.781831,0.62349,0.994522,0.104528,-0.917584,-0.397543
9105867,2014-09-07 20:00:00,320,0.877647,-0.866025,0.500000,-0.781831,0.62349,0.994522,0.104528,-0.917584,-0.397543
9105868,2014-09-07 21:00:00,320,0.751592,-0.707107,0.707107,-0.781831,0.62349,0.994522,0.104528,-0.917584,-0.397543
9105869,2014-09-07 22:00:00,320,0.692665,-0.500000,0.866025,-0.781831,0.62349,0.994522,0.104528,-0.917584,-0.397543


In [16]:
# for consistency, rename the column date as timestamp
ts[stage] = ts[stage].rename(columns={"date": "timestamp"})

In [17]:
#for consistency, use timestamp column a index
ts[stage] = ts[stage].set_index('timestamp')

In [18]:
ts[stage]

Unnamed: 0_level_0,token_id,kw_scaled,sin_hour_day,cos_hour_day,sin_day_week,cos_day_week,sin_day_month,cos_day_month,sin_day_year,cos_day_year
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2014-08-18 01:00:00,320,0.410926,0.258819,0.965926,0.000000,1.00000,-0.587785,-0.809017,-0.729558,-0.683919
2014-08-18 02:00:00,320,0.365127,0.500000,0.866025,0.000000,1.00000,-0.587785,-0.809017,-0.729558,-0.683919
2014-08-18 03:00:00,320,0.331876,0.707107,0.707107,0.000000,1.00000,-0.587785,-0.809017,-0.729558,-0.683919
2014-08-18 04:00:00,320,0.311730,0.866025,0.500000,0.000000,1.00000,-0.587785,-0.809017,-0.729558,-0.683919
2014-08-18 05:00:00,320,0.301258,0.965926,0.258819,0.000000,1.00000,-0.587785,-0.809017,-0.729558,-0.683919
...,...,...,...,...,...,...,...,...,...,...
2014-09-07 19:00:00,320,0.863875,-0.965926,0.258819,-0.781831,0.62349,0.994522,0.104528,-0.917584,-0.397543
2014-09-07 20:00:00,320,0.877647,-0.866025,0.500000,-0.781831,0.62349,0.994522,0.104528,-0.917584,-0.397543
2014-09-07 21:00:00,320,0.751592,-0.707107,0.707107,-0.781831,0.62349,0.994522,0.104528,-0.917584,-0.397543
2014-09-07 22:00:00,320,0.692665,-0.500000,0.866025,-0.781831,0.62349,0.994522,0.104528,-0.917584,-0.397543


### review the limits of the time series used to produce the test dataset

In [19]:
ts['test'].index[0], ts['test'].index[-1]

(Timestamp('2014-08-18 01:00:00'), Timestamp('2014-09-07 23:00:00'))

In [20]:
m, t = 168, 168

In [21]:
# build the first sub-series (first row of test SLDB)
# first lecture in this sub-series is the first lecture in ts['test']
# last lecture in this sub-series is the first lecture in unseen data
row = 0
ts['test'][row:row+m+t]

Unnamed: 0_level_0,token_id,kw_scaled,sin_hour_day,cos_hour_day,sin_day_week,cos_day_week,sin_day_month,cos_day_month,sin_day_year,cos_day_year
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2014-08-18 01:00:00,320,0.410926,0.258819,0.965926,0.000000,1.00000,-0.587785,-0.809017,-0.729558,-0.683919
2014-08-18 02:00:00,320,0.365127,0.500000,0.866025,0.000000,1.00000,-0.587785,-0.809017,-0.729558,-0.683919
2014-08-18 03:00:00,320,0.331876,0.707107,0.707107,0.000000,1.00000,-0.587785,-0.809017,-0.729558,-0.683919
2014-08-18 04:00:00,320,0.311730,0.866025,0.500000,0.000000,1.00000,-0.587785,-0.809017,-0.729558,-0.683919
2014-08-18 05:00:00,320,0.301258,0.965926,0.258819,0.000000,1.00000,-0.587785,-0.809017,-0.729558,-0.683919
...,...,...,...,...,...,...,...,...,...,...
2014-08-31 20:00:00,320,0.692665,-0.866025,0.500000,-0.781831,0.62349,0.207912,0.978148,-0.863142,-0.504961
2014-08-31 21:00:00,320,0.677332,-0.707107,0.707107,-0.781831,0.62349,0.207912,0.978148,-0.863142,-0.504961
2014-08-31 22:00:00,320,0.595795,-0.500000,0.866025,-0.781831,0.62349,0.207912,0.978148,-0.863142,-0.504961
2014-08-31 23:00:00,320,0.510218,-0.258819,0.965926,-0.781831,0.62349,0.207912,0.978148,-0.863142,-0.504961


In [22]:
# build the last sub-series (last row of test SLDB)
# last lecture in this sub-series is the last lecture in unseen data
row = 167
ts['test'][row:row+m+t]

Unnamed: 0_level_0,token_id,kw_scaled,sin_hour_day,cos_hour_day,sin_day_week,cos_day_week,sin_day_month,cos_day_month,sin_day_year,cos_day_year
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2014-08-25 00:00:00,320,0.396759,0.000000,1.000000,0.000000,1.00000,-0.866025,0.500000,-0.806480,-0.591261
2014-08-25 01:00:00,320,0.381225,0.258819,0.965926,0.000000,1.00000,-0.866025,0.500000,-0.806480,-0.591261
2014-08-25 02:00:00,320,0.371574,0.500000,0.866025,0.000000,1.00000,-0.866025,0.500000,-0.806480,-0.591261
2014-08-25 03:00:00,320,0.364089,0.707107,0.707107,0.000000,1.00000,-0.866025,0.500000,-0.806480,-0.591261
2014-08-25 04:00:00,320,0.343153,0.866025,0.500000,0.000000,1.00000,-0.866025,0.500000,-0.806480,-0.591261
...,...,...,...,...,...,...,...,...,...,...
2014-09-07 19:00:00,320,0.863875,-0.965926,0.258819,-0.781831,0.62349,0.994522,0.104528,-0.917584,-0.397543
2014-09-07 20:00:00,320,0.877647,-0.866025,0.500000,-0.781831,0.62349,0.994522,0.104528,-0.917584,-0.397543
2014-09-07 21:00:00,320,0.751592,-0.707107,0.707107,-0.781831,0.62349,0.994522,0.104528,-0.917584,-0.397543
2014-09-07 22:00:00,320,0.692665,-0.500000,0.866025,-0.781831,0.62349,0.994522,0.104528,-0.917584,-0.397543


### and the time series to produce the test SLDB is now verified

In [23]:
# from now on, all data required for inference will be extracted from the time series
# therefore, remove all references to SLDB datasets and TFRecord files

In [24]:
PROJECT_ROOT = '/home/developer/gcp/cbidmltsf'

In [25]:
# during batch prediction, the model identifier is obtained via Abseil Flags
# remember this notebook is based on local execution,
# therefore model directory must be downloaded from GS before running the notebook
model_id = 'BSCTRFM_TPU_014'

In [26]:
# during batch prediction, the SLDB identifier is obtained via Abseil Flags
# THE SLDB FOR INFERENCE MUST BE THE SAME USED FOR TRAINING! (THE ONE SETUP IN THE CONFIGURATION FILE)
sldb_id = 'LD2011-2014_MT320-MT330_BSCTRFM_168_168'

In [27]:
# during batch prediction, the dataset name is obtained via Abseil Flags
dataset = 'test'

In [28]:
# define a forecast window to guide the iterative prediction process
# start with a hourly, day-ahead process
forecast_window = 24

In [29]:
# ADD AN INFERENCE IDENTIFIER, BECAUSE FOR TRANSFORMER-BASED MODELS, DIFFERENT INFERENCES
# CAN BE PRODUCED FROM A SINGLE SAVED MODEL (USUALLY TO PRODUCE DIFFERENT FORECAST WINDOWS)
# during batch prediction, the inference identifier should be obtained via Abseil Flags
inference = '{:03d}'.format(forecast_window)

In [30]:
# build a path to the SLDB json file
data_dir = '{}/{}/{}'.format(PROJECT_ROOT, 'sldbs', sldb_id)

# then get the ts_identifier from the json file in the sldb directory
sldb_json_file = '{}/sldb.json'.format(data_dir)

In [31]:
# open the json file
with open(sldb_json_file, 'r') as inputfile:
    sldb_dict = json.load(inputfile)

In [32]:
# and get the time series identifier
ts_identifier = sldb_dict['ts']
ts_identifier

'LD2011-2014_MT320-MT330'

In [33]:
# load the scaler for MT_320

In [35]:
min_max_scaler = joblib.load('{}/scalers/MT_320_min_max.save'.format(data_dir))

In [36]:
# get the SLDB parameters for the forecasting model
config_json_file = '{}/{}/{}.json'.format(PROJECT_ROOT,
                                          'parameters',
                                          model_id)

# recover the sldb dictionary from the json file in parameters/
with open(config_json_file, 'r') as inputfile:
    configuration = json.load(inputfile)

### jump to global predictions!

In [37]:
# retrieve the prediction function and test it with the adequate tensor-examples

In [38]:
# use model identifier and execution number to build the model directory string
execution = 4
model_dir = '{}_{:02d}'.format(model_id, execution)
model_dir

'BSCTRFM_TPU_014_04'

In [39]:
# get the path to the saved model main directory
saved_model_path = '{}/{}/{}/export/exporter'.format(PROJECT_ROOT,
                                                     'models',
                                                     model_dir)
saved_model_path

'/home/developer/gcp/cbidmltsf/models/BSCTRFM_TPU_014_04/export/exporter'

In [40]:
# get all the files in the saved model path, to find the most recent one
all_files = os.listdir(saved_model_path)
# get the path to the most recent saved model
latest_saved_model_id = sorted(all_files)[-1]
latest_saved_model_id

'1631533140'

In [41]:
# build the full path for the latest saved model dir
export_dir = '{}/{}'.format(saved_model_path, latest_saved_model_id)
print ('Exported model path is {}'.format(export_dir))

Exported model path is /home/developer/gcp/cbidmltsf/models/BSCTRFM_TPU_014_04/export/exporter/1631533140


In [42]:
# load the saved model and the prediction function
imported = load(export_dir=export_dir, tags='serve')
predict_fn = imported.signatures["serving_default"]
predict_fn

<ConcreteFunction pruned(example_bytes) at 0x7FAF64469E50>

In [43]:
# test on the main loop

In [44]:
sldb_dict

{'ts': 'LD2011-2014_MT320-MT330',
 'embedding': {'hourly': 168},
 'tau': {'hourly': 1},
 'no_targets': 168,
 'BSCTRFM': 1,
 'stats': {'train': {'n_rows': 54043},
  'eval': {'n_rows': 2739},
  'test': {'n_rows': 1848}}}

In [45]:
m = sldb_dict['embedding']['hourly']
m

168

In [46]:
t = sldb_dict['no_targets']
t

168

In [47]:
# verify the values of the variables for batch inference
model_id, dataset, inference

('BSCTRFM_TPU_014', 'test', '024')

### run step by step before using the iterative cycle

In [48]:
# a columns list for the predictions dataframe
pred_df_columns = ['model_id',
                   'execution',
                   'dataset',
                   'inference',
                   'string_timestamps',
                   'predictions',
                   'targets']

In [49]:
# build the predictions dataframe
predictions_detail_df = pd.DataFrame(columns=pred_df_columns)

In [50]:
# iterate on a set of valid sub-series of the test dataset (as a time series)
starting_point = 0 # based on the inference dataset

In [51]:
# IMPORTANT!!!
# span: how many rolling windows will be used for 24-hour forecasting results

# remember:
# for each sub-series or row a 24-hour forecasting interval will be calculated
# the first prediction is direct, obtained from the saved model over the feature vectors
# (encoder input, decoder input, id)
# the following 23 predictions are iterative, not direct!

# then, the first rolling window is calculated from the first sub-series
# but the last rolling window is calculated from the sub-series number (1 + 144) as follows:

# sub-series 0
# 2014-08-18 01:00:00 to 2014-09-01 00:00:00
# builds a 24-hour forecasting from 2014-09-01 00:00:00 to 2014-09-01 23:00:00

# sub-series 24
# 2014-08-19 01:00:00 to 2014-09-02 00:00:00
# builds a 24-hour forecasting from 2014-09-02 00:00:00 to 2014-09-02 23:00:00

# sub-series 48
# 2014-08-20 01:00:00 to 2014-09-03 00:00:00
# builds a 24-hour forecasting from 2014-09-03 00:00:00 to 2014-09-03 23:00:00

# sub-series 72
# 2014-08-21 01:00:00 to 2014-09-04 00:00:00
# builds a 24-hour forecasting from 2014-09-04 00:00:00 to 2014-09-04 23:00:00

# sub-series 96
# 2014-08-22 01:00:00 to 2014-09-05 00:00:00
# builds a 24-hour forecasting from 2014-09-05 00:00:00 to 2014-09-05 23:00:00

# sub-series 120
# 2014-08-23 01:00:00 to 2014-09-06 00:00:00
# builds a 24-hour forecasting from 2014-09-06 00:00:00 to 2014-09-06 23:00:00

# sub-series 144
# 2014-08-24 01:00:00 to 2014-09-07 00:00:00
# builds a 24-hour forecasting from 2014-09-07 00:00:00 to 2014-09-07 23:00:00

span = 1 + 6*24 # number of weeks expressed in hours
dataset_row_indexes_list = starting_point + np.arange(span)

In [52]:
dataset_row_indexes_list

array([  0,   1,   2,   3,   4,   5,   6,   7,   8,   9,  10,  11,  12,
        13,  14,  15,  16,  17,  18,  19,  20,  21,  22,  23,  24,  25,
        26,  27,  28,  29,  30,  31,  32,  33,  34,  35,  36,  37,  38,
        39,  40,  41,  42,  43,  44,  45,  46,  47,  48,  49,  50,  51,
        52,  53,  54,  55,  56,  57,  58,  59,  60,  61,  62,  63,  64,
        65,  66,  67,  68,  69,  70,  71,  72,  73,  74,  75,  76,  77,
        78,  79,  80,  81,  82,  83,  84,  85,  86,  87,  88,  89,  90,
        91,  92,  93,  94,  95,  96,  97,  98,  99, 100, 101, 102, 103,
       104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116,
       117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129,
       130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142,
       143, 144])

In [53]:
encoder_input_columns = [
    'kw_scaled',
    'sin_hour_day',
    'cos_hour_day',
    'sin_day_week',
    'cos_day_week',
    'sin_day_month',
    'cos_day_month',
    'sin_day_year',
    'cos_day_year'
]

In [54]:
decoder_input_columns = encoder_input_columns

In [55]:
id_columns = ['token_id']

### detailed test for a single step starts here...

In [56]:
# make a first prediction on the first sub-series

In [57]:
start_index = 0

In [58]:
end_index = start_index + m + t

In [59]:
predictions_list = list()

In [60]:
# counter on the forecast window timestep
i = 0

In [61]:
sub_series = ts[stage][start_index + i: end_index + i]
sub_series

Unnamed: 0_level_0,token_id,kw_scaled,sin_hour_day,cos_hour_day,sin_day_week,cos_day_week,sin_day_month,cos_day_month,sin_day_year,cos_day_year
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2014-08-18 01:00:00,320,0.410926,0.258819,0.965926,0.000000,1.00000,-0.587785,-0.809017,-0.729558,-0.683919
2014-08-18 02:00:00,320,0.365127,0.500000,0.866025,0.000000,1.00000,-0.587785,-0.809017,-0.729558,-0.683919
2014-08-18 03:00:00,320,0.331876,0.707107,0.707107,0.000000,1.00000,-0.587785,-0.809017,-0.729558,-0.683919
2014-08-18 04:00:00,320,0.311730,0.866025,0.500000,0.000000,1.00000,-0.587785,-0.809017,-0.729558,-0.683919
2014-08-18 05:00:00,320,0.301258,0.965926,0.258819,0.000000,1.00000,-0.587785,-0.809017,-0.729558,-0.683919
...,...,...,...,...,...,...,...,...,...,...
2014-08-31 20:00:00,320,0.692665,-0.866025,0.500000,-0.781831,0.62349,0.207912,0.978148,-0.863142,-0.504961
2014-08-31 21:00:00,320,0.677332,-0.707107,0.707107,-0.781831,0.62349,0.207912,0.978148,-0.863142,-0.504961
2014-08-31 22:00:00,320,0.595795,-0.500000,0.866025,-0.781831,0.62349,0.207912,0.978148,-0.863142,-0.504961
2014-08-31 23:00:00,320,0.510218,-0.258819,0.965926,-0.781831,0.62349,0.207912,0.978148,-0.863142,-0.504961


In [62]:
encoder_input = sub_series[encoder_input_columns][:m].copy()
encoder_input

Unnamed: 0_level_0,kw_scaled,sin_hour_day,cos_hour_day,sin_day_week,cos_day_week,sin_day_month,cos_day_month,sin_day_year,cos_day_year
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2014-08-18 01:00:00,0.410926,0.258819,0.965926,0.000000,1.00000,-0.587785,-0.809017,-0.729558,-0.683919
2014-08-18 02:00:00,0.365127,0.500000,0.866025,0.000000,1.00000,-0.587785,-0.809017,-0.729558,-0.683919
2014-08-18 03:00:00,0.331876,0.707107,0.707107,0.000000,1.00000,-0.587785,-0.809017,-0.729558,-0.683919
2014-08-18 04:00:00,0.311730,0.866025,0.500000,0.000000,1.00000,-0.587785,-0.809017,-0.729558,-0.683919
2014-08-18 05:00:00,0.301258,0.965926,0.258819,0.000000,1.00000,-0.587785,-0.809017,-0.729558,-0.683919
...,...,...,...,...,...,...,...,...,...
2014-08-24 20:00:00,0.832267,-0.866025,0.500000,-0.781831,0.62349,-0.951057,0.309017,-0.796183,-0.605056
2014-08-24 21:00:00,0.726583,-0.707107,0.707107,-0.781831,0.62349,-0.951057,0.309017,-0.796183,-0.605056
2014-08-24 22:00:00,0.690250,-0.500000,0.866025,-0.781831,0.62349,-0.951057,0.309017,-0.796183,-0.605056
2014-08-24 23:00:00,0.538478,-0.258819,0.965926,-0.781831,0.62349,-0.951057,0.309017,-0.796183,-0.605056


In [63]:
decoder_input = sub_series[decoder_input_columns][m-1:m-1+t].copy()
decoder_input

Unnamed: 0_level_0,kw_scaled,sin_hour_day,cos_hour_day,sin_day_week,cos_day_week,sin_day_month,cos_day_month,sin_day_year,cos_day_year
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2014-08-25 00:00:00,0.396759,0.000000,1.000000,0.000000,1.00000,-0.866025,0.500000,-0.806480,-0.591261
2014-08-25 01:00:00,0.381225,0.258819,0.965926,0.000000,1.00000,-0.866025,0.500000,-0.806480,-0.591261
2014-08-25 02:00:00,0.371574,0.500000,0.866025,0.000000,1.00000,-0.866025,0.500000,-0.806480,-0.591261
2014-08-25 03:00:00,0.364089,0.707107,0.707107,0.000000,1.00000,-0.866025,0.500000,-0.806480,-0.591261
2014-08-25 04:00:00,0.343153,0.866025,0.500000,0.000000,1.00000,-0.866025,0.500000,-0.806480,-0.591261
...,...,...,...,...,...,...,...,...,...
2014-08-31 19:00:00,0.769356,-0.965926,0.258819,-0.781831,0.62349,0.207912,0.978148,-0.863142,-0.504961
2014-08-31 20:00:00,0.692665,-0.866025,0.500000,-0.781831,0.62349,0.207912,0.978148,-0.863142,-0.504961
2014-08-31 21:00:00,0.677332,-0.707107,0.707107,-0.781831,0.62349,0.207912,0.978148,-0.863142,-0.504961
2014-08-31 22:00:00,0.595795,-0.500000,0.866025,-0.781831,0.62349,0.207912,0.978148,-0.863142,-0.504961


In [64]:
id_input = sub_series[id_columns][:1].copy()
id_input

Unnamed: 0_level_0,token_id
timestamp,Unnamed: 1_level_1
2014-08-18 01:00:00,320


In [75]:
# the target source, for metrics calculation
# the first part of the sub-series is the encoder input, and
# the second part of the sub-series is the target (only the variable column!)
target = sub_series['kw_scaled'][m:].copy()
target

timestamp
2014-08-25 01:00:00    0.381225
2014-08-25 02:00:00    0.371574
2014-08-25 03:00:00    0.364089
2014-08-25 04:00:00    0.343153
2014-08-25 05:00:00    0.341543
                         ...   
2014-08-31 20:00:00    0.692665
2014-08-31 21:00:00    0.677332
2014-08-31 22:00:00    0.595795
2014-08-31 23:00:00    0.510218
2014-09-01 00:00:00    0.428456
Name: kw_scaled, Length: 168, dtype: float64

In [65]:
encoder_input_tensor = tf.expand_dims(encoder_input, axis=0)
encoder_input_tensor

<tf.Tensor: shape=(1, 168, 9), dtype=float64, numpy=
array([[[ 0.41092572,  0.25881905,  0.96592583, ..., -0.80901699,
         -0.72955755, -0.68391942],
        [ 0.36512689,  0.5       ,  0.8660254 , ..., -0.80901699,
         -0.72955755, -0.68391942],
        [ 0.33187646,  0.70710678,  0.70710678, ..., -0.80901699,
         -0.72955755, -0.68391942],
        ...,
        [ 0.69025024, -0.5       ,  0.8660254 , ...,  0.30901699,
         -0.79618286, -0.60505607],
        [ 0.53847826, -0.25881905,  0.96592583, ...,  0.30901699,
         -0.79618286, -0.60505607],
        [ 0.39675947,  0.        ,  1.        , ...,  0.5       ,
         -0.80647995, -0.59126144]]])>

In [66]:
decoder_input_tensor = tf.expand_dims(decoder_input, axis=0)
decoder_input_tensor

<tf.Tensor: shape=(1, 168, 9), dtype=float64, numpy=
array([[[ 0.39675947,  0.        ,  1.        , ...,  0.5       ,
         -0.80647995, -0.59126144],
        [ 0.3812249 ,  0.25881905,  0.96592583, ...,  0.5       ,
         -0.80647995, -0.59126144],
        [ 0.37157414,  0.5       ,  0.8660254 , ...,  0.5       ,
         -0.80647995, -0.59126144],
        ...,
        [ 0.67733159, -0.70710678,  0.70710678, ...,  0.9781476 ,
         -0.86314213, -0.50496105],
        [ 0.5957952 , -0.5       ,  0.8660254 , ...,  0.9781476 ,
         -0.86314213, -0.50496105],
        [ 0.51021821, -0.25881905,  0.96592583, ...,  0.9781476 ,
         -0.86314213, -0.50496105]]])>

In [67]:
id_tensor = tf.expand_dims(id_input, axis=0)
id_tensor

<tf.Tensor: shape=(1, 1, 1), dtype=int64, numpy=array([[[320]]])>

In [68]:
# make input example for the prediction function
input_example = input_tensors_to_serialized_example(encoder_input_tensor,
                                                    decoder_input_tensor,
                                                    id_tensor)
input_example

<tf.Tensor: shape=(), dtype=string, numpy=b'\n\x86_\n\xb8/\n\rencoder_input\x12\xa6/\x12\xa3/\n\xa0/\xdbd\xd2>\xee\x83\x84>\xeaFw?\x00\x00\x00\x00\x00\x00\x80?\x18y\x16\xbf\xbd\x1bO\xbfI\xc4:\xbfX\x15/\xbf\xe9\xf1\xba>\x00\x00\x00?\xd7\xb3]?\x00\x00\x00\x00\x00\x00\x80?\x18y\x16\xbf\xbd\x1bO\xbfI\xc4:\xbfX\x15/\xbf\xb6\xeb\xa9>\xf3\x045?\xf3\x045?\x00\x00\x00\x00\x00\x00\x80?\x18y\x16\xbf\xbd\x1bO\xbfI\xc4:\xbfX\x15/\xbf\r\x9b\x9f>\xd7\xb3]?\x00\x00\x00?\x00\x00\x00\x00\x00\x00\x80?\x18y\x16\xbf\xbd\x1bO\xbfI\xc4:\xbfX\x15/\xbf\x7f>\x9a>\xeaFw?\xee\x83\x84>\x00\x00\x00\x00\x00\x00\x80?\x18y\x16\xbf\xbd\x1bO\xbfI\xc4:\xbfX\x15/\xbfa>\x11?\x00\x00\x80?21\x8d$\x00\x00\x00\x00\x00\x00\x80?\x18y\x16\xbf\xbd\x1bO\xbfI\xc4:\xbfX\x15/\xbfJ\x11\xf7>\xeaFw?\xee\x83\x84\xbe\x00\x00\x00\x00\x00\x00\x80?\x18y\x16\xbf\xbd\x1bO\xbfI\xc4:\xbfX\x15/\xbf\xd4\xd4\xf8>\xd7\xb3]?\x00\x00\x00\xbf\x00\x00\x00\x00\x00\x00\x80?\x18y\x16\xbf\xbd\x1bO\xbfI\xc4:\xbfX\x15/\xbf\xeb\xda"?\xf3\x045?\xf3\x045\xbf\x00\

In [69]:
# get the output of the prediction function as a dictionary
predict_output_dict = predict_fn(input_example)
predict_output_dict

{'forecast': <tf.Tensor: shape=(1, 168, 1), dtype=float32, numpy=
 array([[[0.40028378],
         [0.3769824 ],
         [0.35605752],
         [0.326269  ],
         [0.32296124],
         [0.55946195],
         [0.5453562 ],
         [0.51439494],
         [0.64813906],
         [0.72489256],
         [0.7814641 ],
         [0.796474  ],
         [0.8100412 ],
         [0.83003163],
         [0.8531544 ],
         [0.882726  ],
         [0.8854347 ],
         [0.9272934 ],
         [0.8826736 ],
         [0.8110722 ],
         [0.78030956],
         [0.6582955 ],
         [0.5507602 ],
         [0.47450253],
         [0.48714185],
         [0.45515937],
         [0.4528046 ],
         [0.35588014],
         [0.31910914],
         [0.5363588 ],
         [0.5161942 ],
         [0.5323113 ],
         [0.64291584],
         [0.7592131 ],
         [0.7987156 ],
         [0.88789   ],
         [0.8652852 ],
         [0.87763035],
         [0.8857957 ],
         [0.9001374 ],
         [0.91

In [70]:
# get the prediction output tensor
predict_output_tensor = predict_output_dict['forecast']
predict_output_tensor

<tf.Tensor: shape=(1, 168, 1), dtype=float32, numpy=
array([[[0.40028378],
        [0.3769824 ],
        [0.35605752],
        [0.326269  ],
        [0.32296124],
        [0.55946195],
        [0.5453562 ],
        [0.51439494],
        [0.64813906],
        [0.72489256],
        [0.7814641 ],
        [0.796474  ],
        [0.8100412 ],
        [0.83003163],
        [0.8531544 ],
        [0.882726  ],
        [0.8854347 ],
        [0.9272934 ],
        [0.8826736 ],
        [0.8110722 ],
        [0.78030956],
        [0.6582955 ],
        [0.5507602 ],
        [0.47450253],
        [0.48714185],
        [0.45515937],
        [0.4528046 ],
        [0.35588014],
        [0.31910914],
        [0.5363588 ],
        [0.5161942 ],
        [0.5323113 ],
        [0.64291584],
        [0.7592131 ],
        [0.7987156 ],
        [0.88789   ],
        [0.8652852 ],
        [0.87763035],
        [0.8857957 ],
        [0.9001374 ],
        [0.9149616 ],
        [0.91475666],
        [0.8864504 ],
 

In [71]:
# get the most recent prediction
most_recent_prediction = predict_output_tensor[0, :, 0].numpy()[-1]
most_recent_prediction

0.40008524

In [72]:
# append the most recent prediction timestep to the predictions list
predictions_list.append(most_recent_prediction)
predictions_list

[0.40008524]

### detailed test for a single step ends here...

### detailed test for a single row/sub-series starts here...

In [230]:
start_index, end_index, forecast_window

(0, 336, 24)

In [234]:
# re-initialize the predictions list for each row/sub-series
predictions_list = list()

for i in np.arange(forecast_window):
    
    sub_series = ts[stage][start_index + i: end_index + i]
    encoder_input = sub_series[encoder_input_columns][:m].copy()
    decoder_input = sub_series[decoder_input_columns][m-1:m-1+t].copy()

    # on first step of the forecast window (i = 0),
    # the decoder input carries only true values and the predictions list is empty
    # on subsequent steps, the decoder input includes all previous predictions (stored in the predictions list)

    # in other words, a predictions list size = i replaces the last i variable lectures in the decoder input
    # at the end, there are no true values in the decoder input, just predictions

    if i > 0:
        decoder_input['kw_scaled'][-i:] = predictions_list
    
    id_input = sub_series[id_columns][:1].copy()
    target = sub_series['kw_scaled'][m:].copy()
    encoder_input_tensor = tf.expand_dims(encoder_input, axis=0)
    decoder_input_tensor = tf.expand_dims(decoder_input, axis=0)
    id_tensor = tf.expand_dims(id_input, axis=0)
    
    input_example = input_tensors_to_serialized_example(encoder_input_tensor,
                                                        decoder_input_tensor,
                                                        id_tensor)

    predict_output_dict = predict_fn(input_example)
    predict_output_tensor = predict_output_dict['forecast']
    most_recent_prediction = predict_output_tensor[0, :, 0].numpy()[-1]
    predictions_list.append(most_recent_prediction)

    current_predictions_array = np.array(predictions_list).reshape(-1, 1)
    # get the targets vector to be compared with the current predictions array
    current_targets = np.array(target[-i-1:]).reshape(-1, 1)

    # calculate SMAPE on the rescaled variable
    rescaled_predictions = min_max_scaler.inverse_transform(current_predictions_array)
    rescaled_targets = min_max_scaler.inverse_transform(current_targets)

    current_smape = smape(rescaled_targets, rescaled_predictions)
    print('On row {}, SMAPE for the first {} rescaled prediction(s) is {}'.format(start_index,
                                                                                  i + 1,
                                                                                  current_smape))

On row 0, SMAPE for the first 1 rescaled prediction(s) is 0.06564676984725311
On row 0, SMAPE for the first 2 rescaled prediction(s) is 0.036412821156947
On row 0, SMAPE for the first 3 rescaled prediction(s) is 0.048865858200501565
On row 0, SMAPE for the first 4 rescaled prediction(s) is 0.03928444445051047
On row 0, SMAPE for the first 5 rescaled prediction(s) is 0.04582564134300999
On row 0, SMAPE for the first 6 rescaled prediction(s) is 0.0520188362560172
On row 0, SMAPE for the first 7 rescaled prediction(s) is 0.05227517516613175
On row 0, SMAPE for the first 8 rescaled prediction(s) is 0.05849679733356485
On row 0, SMAPE for the first 9 rescaled prediction(s) is 0.06538632402192844
On row 0, SMAPE for the first 10 rescaled prediction(s) is 0.07215555443961374
On row 0, SMAPE for the first 11 rescaled prediction(s) is 0.07713512845589214
On row 0, SMAPE for the first 12 rescaled prediction(s) is 0.07161625960453977
On row 0, SMAPE for the first 13 rescaled prediction(s) is 0.06

### detailed test for a single row/sub-series ends here...

### run inference process and build databases

In [250]:
# during batch prediction, the execution identifier is obtained via Abseil Flags
for execution in [4]:
    # a columns list for the predictions dataframe
    pred_df_columns = ['model_id',
                       'execution',
                       'dataset',
                       'inference',
                       'string_timestamps',
                       'predictions',
                       'targets']
    
    # build the predictions dataframe
    predictions_detail_df = pd.DataFrame(columns=pred_df_columns)

    # use model identifier and execution number to build the model directory string
    model_dir = '{}_{:02d}'.format(model_id, execution)

    # get the path to the saved model main directory
    saved_model_path = '{}/{}/{}/export/exporter'.format(PROJECT_ROOT,
                                                         'models',
                                                         model_dir)

    # get all the files in the saved model path, to find the most recent one
    all_files = os.listdir(saved_model_path)
    # get the path to the most recent saved model
    latest_saved_model_id = sorted(all_files)[-1]

    # build the full path for the latest saved model dir
    export_dir = '{}/{}'.format(saved_model_path, latest_saved_model_id)
    print ('Exported model path is {}'.format(export_dir))

    # load the saved model and the prediction function
    imported = load(export_dir=export_dir, tags='serve')
    predict_fn = imported.signatures["serving_default"]
    
    # iterate on a set of valid rows of the test dataset
    starting_point = 0 # based on the inference dataset
    span = 1 + 6*24 # number of days in the test dataset, expressed in hours
    dataset_row_indexes_list = starting_point + np.arange(span)
    
    for start_index in dataset_row_indexes_list:

        # define first prediction interval with start- and end-index
        # given the interval time_series[start_index:end_index]
        # the conditioning range is the union of the encoder-input and the decoder-input
        # and the prediction range is only the last lecture in the interval,
        # by means of a recursive inference process
        # on each step the last prediction is added to the decoder input
        # and the prediction range grows one step into the future

        # get the end-index of this recursive inference interval
        end_index = start_index + (m + t)

        # initialize a list to store recurrent predictions for this interval
        predictions_list = list()

        for i in np.arange(forecast_window):

            # build the inference interval as a sub-series of the dataset
            sub_series = ts[stage][start_index + i : end_index + i]

            # important: build sources as copies of the sub-series (and therefore of the global time series)
            # to avoid overwriting the original dataset

            # the encoder input source
            encoder_input = sub_series[encoder_input_columns][:m].copy()

            # the decoder input source
            # decoder_input = sub_series[m-1:-1].copy()
            decoder_input = sub_series[decoder_input_columns][m-1:m-1+t].copy()
            
            # the id (integer) for the customer
            id_input = sub_series[id_columns][:1].copy()

            # on first step (i=0), the decoder input carries only true values
            # and the predictions list is empty
            # on subsequent steps, the decoder input includes all previous predictions
            # (stored in the predictions list)
            if i > 0:
                decoder_input['kw_scaled'][-i:] = predictions_list

            # the target source, for metrics calculation
            # the first part of the sub-series is the encoder input, and
            # the second part of the sub-series is the target (only the variable column!)
            target = sub_series['kw_scaled'][m:].copy()

            # build source tensors from the sub-series    
            encoder_input_tensor = tf.expand_dims(encoder_input, axis=0)
            decoder_input_tensor = tf.expand_dims(decoder_input, axis=0)
            id_tensor = tf.expand_dims(id_input, axis=0)

            # make input example for the prediction function
            input_example = input_tensors_to_serialized_example(encoder_input_tensor,
                                                                decoder_input_tensor,
                                                                id_tensor)

            # get the output of the prediction function as a dictionary
            predict_output_dict = predict_fn(input_example)

            # get the prediction output tensor
            predict_output_tensor = predict_output_dict['forecast']

            # get the most recent prediction
            most_recent_prediction = predict_output_tensor[0, :, 0].numpy()[-1]

            # append the most recent prediction timestep to the predictions list
            predictions_list.append(most_recent_prediction)

            # pass the predictions list to an array
            # current_predictions_array = np.array(predictions_list).reshape(-1, 1)
            # get the targets vector to be compared with the current predictions array
            # current_targets = np.array(target[-i-1:]).reshape(-1, 1)

            # calculate SMAPE on the rescaled variable
            # rescaled_predictions = min_max_scaler.inverse_transform(current_predictions_array)
            # rescaled_targets = min_max_scaler.inverse_transform(current_targets)

            # current_smape = smape(rescaled_targets, rescaled_predictions)
            # print('On row {}, SMAPE for the first {} rescaled prediction(s) is {}'.format(start_index,
            #                                                                               i + 1,
            #                                                                               current_smape))        

        # iterative predictions over the forecast window reside in predictions_list
        # convert list to array, then expand feature dimension with value 1
        predicted_values = np.array(predictions_list).reshape(-1, 1)

        # inverse-scale predictions
        rescaled_predicted_values = min_max_scaler.inverse_transform(predicted_values)

        # and the true values remain in the prediction tensor, pass them to a NumPy array
        # for the true values array, expand feature dimension with value 1
        true_values = np.array(target[-i-1:]).reshape(-1, 1)

        # inverse-scale true values
        rescaled_true_values = min_max_scaler.inverse_transform(true_values)

        # a temporary dataframe built from the data in the current row
        df = pd.DataFrame(columns=pred_df_columns)
        df['model_id'] = [model_id]
        df['execution'] = [execution]
        df['dataset'] = [dataset]
        df['inference'] = [inference]
        df['string_timestamps']= [pd.to_datetime(target.index[-i-1:]).astype(str).tolist()]
        df['predictions'] = [np.squeeze(rescaled_predicted_values).tolist()]
        df['targets'] = [np.squeeze(rescaled_true_values).tolist()]
        df['mae'] = mean_absolute_error(rescaled_true_values, rescaled_predicted_values)
        df['rmse'] = sqrt(mean_squared_error(rescaled_true_values, rescaled_predicted_values))
        df['smape'] = smape(rescaled_true_values, rescaled_predicted_values)

        # append the temporary dataframe to the predictions detail dataframe
        predictions_detail_df = pd.concat([predictions_detail_df, df])

    
    # reset the index of final dataframe, once all of its rows (dataset) have been processed
    predictions_detail_df = predictions_detail_df.reset_index(drop=True)

    # build a predictions summary dataframe, reset index to avoid making a multi-column index when grouping by
    predictions_summary_df = predictions_detail_df.groupby(['model_id',
                                                            'execution',
                                                            'dataset',
                                                            'inference']).mean().reset_index()

    # a range to iterate on prediction timesteps
    targets_range = np.arange(forecast_window)

    # vector metric (vector component to vector component)
    # an array forecast_window-d: metric for 1, 2,..., no_targets step-ahead
    # (target versus prediction for rows in dataset)

    # for index, row in dataframe.iterrows()
    mae_vector = [
        mean_absolute_error(
            # a list with the n-rows target values for the n-th step ahead
            [row.targets[n] for _, row in predictions_detail_df.iterrows()],
            # a list with the n-rows prediction values for the n-th step ahead
            [row.predictions[n] for _, row in predictions_detail_df.iterrows()]
        ) for n in targets_range
    ]
    predictions_summary_df['mae_vector'] = [mae_vector]

    # for index, row in dataframe.iterrows()
    rmse_vector = [
        sqrt(mean_squared_error(
            # a list with the n-rows target values for the n-th step ahead
            [row.targets[n] for _, row in predictions_detail_df.iterrows()],
            # a list with the n-rows prediction values for the n-th step ahead
            [row.predictions[n] for _, row in predictions_detail_df.iterrows()]
        )) for n in targets_range
    ]
    predictions_summary_df['rmse_vector'] = [rmse_vector]
    
    # for index, row in dataframe.iterrows()
    smape_vector = [
        smape(
            [row.targets[n] for _, row in predictions_detail_df.iterrows()],
            [row.predictions[n] for _, row in predictions_detail_df.iterrows()]
        ) for n in targets_range
    ]
    predictions_summary_df['smape_vector'] = [smape_vector]

    # insert count of rows as a column value
    predictions_summary_df.insert(4, 'count', len(dataset_row_indexes_list))

    # build a path to persist the dataframe to database/predictions_detail/
    detail_pickle_path = '{}/{}/{}/{}_{:02d}_{}_{}.pkl'.format(
        PROJECT_ROOT,
        'database',
        'predictions_detail',
        model_id,
        execution,
        dataset,
        inference)
    
    # persist the Pandas dataframe to database/predictions_detail/
    # predictions_detail_df.to_pickle(detail_pickle_path)
    # print('Persisted Pandas dataframe for predictions detail of {}_{:02d}_{}_{}'.format(model_id,
    #                                                                                     execution,
    #                                                                                     dataset,
    #                                                                                     inference))

    # build a path to persist the dataframe to database/predictions_summary/
    summary_pickle_path = '{}/{}/{}/{}_{:02d}_{}_{}.pkl'.format(
        PROJECT_ROOT,
        'database',
        'predictions_summary',
        model_id,
        execution,
        dataset,
        inference)

    # persist the Pandas dataframe to database/predictions_summary/
    #  predictions_summary_df.to_pickle(summary_pickle_path)
    # print('Persisted Pandas dataframe for predictions summary of {}_{:02d}_{}_{}'.format(model_id,
    #                                                                                      execution,
    #                                                                                      dataset,
    #                                                                                      inference))


Exported model path is /home/developer/gcp/cbidmltsf/models/BSCTRFM_TPU_014_04/export/exporter/1631533140


In [280]:
predictions_detail_df[-1:]['string_timestamps'][144]

['2014-09-07 00:00:00',
 '2014-09-07 01:00:00',
 '2014-09-07 02:00:00',
 '2014-09-07 03:00:00',
 '2014-09-07 04:00:00',
 '2014-09-07 05:00:00',
 '2014-09-07 06:00:00',
 '2014-09-07 07:00:00',
 '2014-09-07 08:00:00',
 '2014-09-07 09:00:00',
 '2014-09-07 10:00:00',
 '2014-09-07 11:00:00',
 '2014-09-07 12:00:00',
 '2014-09-07 13:00:00',
 '2014-09-07 14:00:00',
 '2014-09-07 15:00:00',
 '2014-09-07 16:00:00',
 '2014-09-07 17:00:00',
 '2014-09-07 18:00:00',
 '2014-09-07 19:00:00',
 '2014-09-07 20:00:00',
 '2014-09-07 21:00:00',
 '2014-09-07 22:00:00',
 '2014-09-07 23:00:00']

In [252]:
predictions_summary_df

Unnamed: 0,model_id,execution,dataset,inference,count,mae,rmse,smape,mae_vector,rmse_vector,smape_vector
0,BSCTRFM_TPU_014,4,test,24,145,10.218301,12.668499,0.088831,"[8.681233039474387, 9.68678785410691, 9.680264...","[11.177788279915541, 12.412924580878334, 12.40...","[0.0747142895756404, 0.0839992370331931, 0.083..."


In [30]:
PROJECT_ROOT = '/home/developer/gcp/cbidmltsf'

In [31]:
model_id = 'BSCTRFM_TPU_009'
execution = 0
dataset = 'test'
forecast_window = 24
inference = '{:03d}'.format(forecast_window)

In [32]:
# build a path to access the predictions detail dataframe
detail_pickle_path = '{}/{}/{}/{}_{:02d}_{}_{}.pkl'.format(
    PROJECT_ROOT,
    'database',
    'predictions_detail',
    model_id,
    execution,
    dataset,
    inference)
detail_pickle_path

'/home/developer/gcp/cbidmltsf/database/predictions_detail/BSCTRFM_TPU_009_00_test_024.pkl'

In [33]:
predictions_detail_df = pd.read_pickle(detail_pickle_path)

In [34]:
plots = dict()

In [35]:
# plot results for a given row
row = 0

In [36]:
start_timestamp = predictions_detail_df.loc[row]['string_timestamps'][0]
end_timestamp = predictions_detail_df.loc[row]['string_timestamps'][-1]

In [37]:
# label the plot with the predictions start timestamp
label = start_timestamp

In [38]:
plots[label] = figure(
    x_axis_type='datetime',
    plot_width=960,
    plot_height=400,
    title='{}-step-ahead active power predictions starting on {} with MAE={}, RMSE={}, SMAPE={}'\
    .format(forecast_window,
            label,
            round(predictions_detail_df.loc[row]['mae'], 6),
            round(predictions_detail_df.loc[row]['rmse'], 6),
            round(predictions_detail_df.loc[row]['smape'], 6)
           )
)

plots[label].grid.grid_line_alpha=0.5

plots[label].xaxis.axis_label = 'Timestamp'
plots[label].yaxis.axis_label = 'Active power [KW]'

plots[label].line(
    pd.to_datetime(predictions_detail_df.loc[row]['string_timestamps']),
    predictions_detail_df.loc[row]['targets'],
    color='green',
    legend_label='real')

plots[label].line(
    pd.to_datetime(predictions_detail_df.loc[row]['string_timestamps']),
    predictions_detail_df.loc[row]['predictions'],
    color='red',
    legend_label='predicted')

plots[label].legend.location = 'bottom_left'

# uncomment the following two lines to save plot
# output_file('/home/developer/gcp/cbidmltsf/datasets/cfe/{}_H_kw.html'.format(device))
# save(plots[label])

# uncomment the following line to display plot
show(plots[label])