In [1]:
import numpy as np

In [2]:
import pandas as pd

In [3]:
from bokeh.plotting import figure, show, output_file, save
from bokeh.io import output_notebook
# select a palette
from bokeh.palettes import d3
output_notebook()

In [4]:
plots = dict()

In [5]:
PROJECT_ROOT = '/home/developer/gcp/cbidmltsf'

In [109]:
identifiers = [
    'BSCTRFM_TPU_010_test_024',
    'DMSLSTM_TPU_006_test_024',
    'EDSLSTM_TPU_013_test_024',    
]

In [110]:
start, end = 0, 168
rows = np.arange(start, end)

In [111]:
# a couple of dictionaries to calculate prediction statistics across executions
future_timestamps = dict()
future_smape_values = dict()

In [112]:
model_id = identifiers[0][:-9]
dataset = 'test'
forecast_window = 24
inference = '{:03d}'.format(forecast_window)

label = '{}_{}_{}'.format(model_id, dataset, inference)

plots[label] = figure(
    x_axis_type='datetime',
    plot_width=960,
    plot_height=400,
    title='{}-step-ahead for {}'.format(
        forecast_window,
        model_id
    )
)

plots[label].grid.grid_line_alpha=0.5

plots[label].xaxis.axis_label = 'Starting timestamp of the forecast window'
plots[label].yaxis.axis_label = 'SMAPE for predictions over the forecast window'

future_timestamps[label] = dict()
future_smape_values[label] = dict()

for execution in [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]:
    
    # build a path to access the predictions detail dataframe
    detail_pickle_path = '{}/{}/{}/{}_{:02d}_{}_{}.pkl'.format(
        PROJECT_ROOT,
        'database',
        'predictions_detail',
        model_id,
        execution,
        dataset,
        inference)

    predictions_detail_df = pd.read_pickle(detail_pickle_path)
    
    future_timestamps[label][execution] = pd.to_datetime(
        [predictions_detail_df.loc[row]['string_timestamps'][0] for row in rows]
    )
    
    future_smape_values[label][execution] = [predictions_detail_df.loc[row]['smape'] for row in rows]
    
    plots[label].line(
        future_timestamps[label][execution],
        future_smape_values[label][execution],
        color=d3['Category10'][10][execution],
        legend_label='{:02d}'.format(execution))


show(plots[label])

In [113]:
model_id = identifiers[1][:-9]
dataset = 'test'
forecast_window = 24
inference = '{:03d}'.format(forecast_window)

label = '{}_{}_{}'.format(model_id, dataset, inference)

plots[label] = figure(
    x_axis_type='datetime',
    plot_width=960,
    plot_height=400,
    title='{}-step-ahead for {}'.format(
        forecast_window,
        model_id
    )
)

plots[label].grid.grid_line_alpha=0.5

plots[label].xaxis.axis_label = 'Starting timestamp of the forecast window'
plots[label].yaxis.axis_label = 'SMAPE for predictions over the forecast window'

future_timestamps[label] = dict()
future_smape_values[label] = dict()

for execution in [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]:
    
    # build a path to access the predictions detail dataframe
    detail_pickle_path = '{}/{}/{}/{}_{:02d}_{}_{}.pkl'.format(
        PROJECT_ROOT,
        'database',
        'predictions_detail',
        model_id,
        execution,
        dataset,
        inference)

    predictions_detail_df = pd.read_pickle(detail_pickle_path)
    
    # BSCTRFM string timestamps are lists
    # DMSLSTM string timestamps are NumPy arrays, then
    future_timestamps[label][execution] = \
    [pd.to_datetime(predictions_detail_df.loc[row]['string_timestamps'][0]) for row in rows]
    
    future_smape_values[label][execution] = [predictions_detail_df.loc[row]['smape'] for row in rows]

    plots[label].line(
        future_timestamps[label][execution],
        future_smape_values[label][execution],
        color=d3['Category10'][10][execution],
        legend_label='{:02d}'.format(execution))


show(plots[label])

In [114]:
model_id = identifiers[2][:-9]
dataset = 'test'
forecast_window = 24
inference = '{:03d}'.format(forecast_window)

label = '{}_{}_{}'.format(model_id, dataset, inference)

plots[label] = figure(
    x_axis_type='datetime',
    plot_width=960,
    plot_height=400,
    title='{}-step-ahead for {}'.format(
        forecast_window,
        model_id
    )
)

plots[label].grid.grid_line_alpha=0.5

plots[label].xaxis.axis_label = 'Starting timestamp of the forecast window'
plots[label].yaxis.axis_label = 'SMAPE for predictions over the forecast window'

future_timestamps[label] = dict()
future_smape_values[label] = dict()

for execution in [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]:
    
    # build a path to access the predictions detail dataframe
    detail_pickle_path = '{}/{}/{}/{}_{:02d}_{}_{}.pkl'.format(
        PROJECT_ROOT,
        'database',
        'predictions_detail',
        model_id,
        execution,
        dataset,
        inference)

    predictions_detail_df = pd.read_pickle(detail_pickle_path)
    
    # BSCTRFM string timestamps are lists
    # DMSLSTM string timestamps are NumPy arrays, then
    future_timestamps[label][execution] = \
    [pd.to_datetime(predictions_detail_df.loc[row]['string_timestamps'][0]) for row in rows]
    
    future_smape_values[label][execution] = [predictions_detail_df.loc[row]['smape'] for row in rows]

    plots[label].line(
        future_timestamps[label][execution],
        future_smape_values[label][execution],
        color=d3['Category10'][10][execution],
        legend_label='{:02d}'.format(execution))


show(plots[label])

In [115]:
# from dictionary to dataframe for readability and statistics

In [116]:
data = dict()

In [117]:
for identifier in identifiers:
    smape_values_df = pd.DataFrame.from_dict(future_smape_values[identifier])

    timestamps_df = pd.DataFrame.from_dict(future_timestamps[identifier][0])
    timestamps_df = timestamps_df.rename(columns={0: 'timestamp'})

    data[identifier] = pd.concat([timestamps_df, smape_values_df], axis=1)

    data[identifier] = data[identifier].set_index('timestamp')
    
    data[identifier]['mean'] = data[identifier].mean(axis=1)

    data[identifier]['std'] = data[identifier].std(axis=1)

In [119]:
label = 'general'

plots[label] = figure(
    # x_axis_type='datetime',
    plot_width=960,
    plot_height=400,
    title='Hourly-day-ahead SMAPE values for one week (average across 10 executions)'
)

plots[label].grid.grid_line_alpha=0.5

plots[label].xaxis.axis_label = 'Timesteps'
plots[label].yaxis.axis_label = 'Average SMAPE for predictions over the forecast window'

line_color = 0

for identifier in identifiers:
    plots[label].line(
        np.arange(end),
        data[identifier]['mean'],
        color=d3['Category10'][3][line_color],
        legend_label='{}:{}/{}'.format(
            # get the model architecture from the identifier
            identifier[:-9],
            # get the mean of the curve
            round(data[identifier]['mean'].mean(), 6),
            # get the std of the curve
            round(data[identifier]['std'].mean(), 6))
    )
    line_color+=1
    
    plots[label].legend.location = 'top_left'

show(plots[label])