## consolidate predictions detail and get global metrics for electricity dataset

In [22]:
import os
import json
import numpy as np
import pandas as pd
import joblib
from datetime import datetime
from math import sqrt
from sklearn.metrics import mean_squared_error, mean_absolute_error

In [23]:
from bokeh.plotting import figure, show, output_file, save
from bokeh.io import output_notebook
from bokeh.layouts import row, gridplot, layout
from bokeh.palettes import d3
from bokeh.models import Span
from bokeh.io import export_png

output_notebook()

In [24]:
# pd.set_option('display.max_rows', 200)

In [25]:
PROJECT_ROOT = '/home/developer/gcp/cbidmltsf'

In [26]:
# build a list with customer_ids
start, end = 1, 370

customer_ids = ['MT_{:03d}'.format(token_id) for token_id in np.arange(start, end + 1)]

In [27]:
len(customer_ids)

370

In [28]:
sldb_id = 'LD2011-2014_SEPARATED_FULL_BSCTRFM_168_168_07DB_MMX'
sldb_id

'LD2011-2014_SEPARATED_FULL_BSCTRFM_168_168_07DB_MMX'

In [29]:
forecast_window = 24
inference = '{:03d}'.format(forecast_window)

In [30]:
# build a path to the SLDB json file
data_dir = '{}/{}/{}'.format(PROJECT_ROOT, 'sldbs', sldb_id)
data_dir

'/home/developer/gcp/cbidmltsf/sldbs/LD2011-2014_SEPARATED_FULL_BSCTRFM_168_168_07DB_MMX'

In [31]:
# then get the ts_identifier from the json file in the sldb directory
sldb_json_file = '{}/sldb.json'.format(data_dir)

In [32]:
# open the json file
with open(sldb_json_file, 'r') as inputfile:
    sldb_dict = json.load(inputfile)

In [33]:
ts_identifier = sldb_dict['ts']
m = sldb_dict['embedding']['hourly']
t = sldb_dict['no_targets']

In [34]:
ts_identifier, m, t

('LD2011-2014_SEPARATED_FULL', 168, 168)

## load all inference pickle files, then group results per saved model

In [35]:
pickle_path = '{}/inferences/electricity'.format(PROJECT_ROOT)

In [36]:
# manually get the required pickle files
pickle_files = [
    # 128 batch size
    'BSCTRFM_TPU_066_00_1648667550_test_168_00.pkl',
    'BSCTRFM_TPU_066_01_1648667853_test_168_00.pkl',
    'BSCTRFM_TPU_066_02_1648669390_test_168_00.pkl',
    'BSCTRFM_TPU_066_03_1648669692_test_168_00.pkl',
    'BSCTRFM_TPU_066_04_1648669991_test_168_00.pkl',
    'BSCTRFM_TPU_066_05_1648670290_test_168_00.pkl',
    'BSCTRFM_TPU_066_06_1648670606_test_168_00.pkl',
    'BSCTRFM_TPU_066_07_1648670907_test_168_00.pkl',
    'BSCTRFM_TPU_066_08_1648671234_test_168_00.pkl',
    'BSCTRFM_TPU_066_09_1648671534_test_168_00.pkl',
    # 256 batch size
    'BSCTRFM_TPU_060_00_1643342901_test_168_00.pkl',
    'BSCTRFM_TPU_060_01_1643343208_test_168_00.pkl',
    'BSCTRFM_TPU_060_02_1643388975_test_168_00.pkl',
    'BSCTRFM_TPU_060_03_1643389253_test_168_00.pkl',
    'BSCTRFM_TPU_060_04_1643389647_test_168_00.pkl',
    'BSCTRFM_TPU_060_05_1643389923_test_168_00.pkl',
    'BSCTRFM_TPU_060_06_1643390226_test_168_00.pkl',
    'BSCTRFM_TPU_060_07_1643390534_test_168_00.pkl',
    'BSCTRFM_TPU_060_08_1643390838_test_168_00.pkl',
    'BSCTRFM_TPU_060_09_1643391143_test_168_00.pkl',
    # 512 batch size
    'BSCTRFM_TPU_062_03_1648752404_test_168_00.pkl',
    'BSCTRFM_TPU_062_04_1648752704_test_168_00.pkl',
    'BSCTRFM_TPU_062_05_1648752998_test_168_00.pkl',
    'BSCTRFM_TPU_062_06_1648753288_test_168_00.pkl',
    'BSCTRFM_TPU_062_07_1648753586_test_168_00.pkl',
    'BSCTRFM_TPU_062_08_1648753880_test_168_00.pkl',
    'BSCTRFM_TPU_062_09_1648754190_test_168_00.pkl',
    'BSCTRFM_TPU_062_10_1648754489_test_168_00.pkl',
    'BSCTRFM_TPU_062_11_1648754784_test_168_00.pkl',
    'BSCTRFM_TPU_062_12_1648755099_test_168_00.pkl',
    # 1024 batch size
    'BSCTRFM_TPU_064_00_1648835854_test_168_00.pkl',
    'BSCTRFM_TPU_064_01_1648836125_test_168_00.pkl',
    'BSCTRFM_TPU_064_02_1648836416_test_168_00.pkl',
    'BSCTRFM_TPU_064_03_1648836709_test_168_00.pkl',
    'BSCTRFM_TPU_064_04_1648837002_test_168_00.pkl',
    'BSCTRFM_TPU_064_05_1648837294_test_168_00.pkl',
    'BSCTRFM_TPU_064_06_1648837593_test_168_00.pkl',
    'BSCTRFM_TPU_064_07_1648837890_test_168_00.pkl',
    'BSCTRFM_TPU_064_08_1648838192_test_168_00.pkl',
    'BSCTRFM_TPU_064_09_1648838498_test_168_00.pkl',
]

In [37]:
len(pickle_files)

40

In [38]:
metrics_dataframe_columns = [
    'model_id',
    'execution',
    'saved_model_id',
    'dataset',
    'inference',
    'event',
    'nd',
    'nrmse'
]

In [39]:
metrics_dataframe = pd.DataFrame(columns=metrics_dataframe_columns)

for pickle_file in pickle_files:
       
    inferences_df = pd.read_pickle('{}/{}'.format(pickle_path, pickle_file))

    # buffer_df = pd.DataFrame(columns=metrics_dataframe_columns)
    
    model_id = pickle_file[:15]
    execution = int(pickle_file[16:18])
    saved_model_id = int(pickle_file[19:29])
    dataset = pickle_file[30:34]
    inference = pickle_file[35:38]
    event = int(pickle_file[39:41])     
    
    mae = mean_absolute_error(inferences_df['prediction'], inferences_df['target'])
    true_values_average = np.mean(inferences_df['target'])
    nd = mae/true_values_average
    rmse = sqrt(mean_squared_error(inferences_df['prediction'], inferences_df['target']))
    nrmse = rmse/true_values_average
    
    row_dict = {
        'model_id': model_id,
        'execution': execution,
        'saved_model_id': saved_model_id,
        'dataset': dataset,
        'inference': inference,
        'event': event,
        'nd': nd,
        'nrmse': nrmse
    }
    
    metrics_dataframe = metrics_dataframe.append(row_dict, ignore_index = True)

In [40]:
metrics_dataframe

Unnamed: 0,model_id,execution,saved_model_id,dataset,inference,event,nd,nrmse
0,BSCTRFM_TPU_066,0,1648667550,test,168,0,0.075765,0.603091
1,BSCTRFM_TPU_066,1,1648667853,test,168,0,0.082431,0.57562
2,BSCTRFM_TPU_066,2,1648669390,test,168,0,0.095633,0.810085
3,BSCTRFM_TPU_066,3,1648669692,test,168,0,0.079297,0.607872
4,BSCTRFM_TPU_066,4,1648669991,test,168,0,0.079695,0.580218
5,BSCTRFM_TPU_066,5,1648670290,test,168,0,0.087047,0.688569
6,BSCTRFM_TPU_066,6,1648670606,test,168,0,0.091994,0.674767
7,BSCTRFM_TPU_066,7,1648670907,test,168,0,0.0806,0.573557
8,BSCTRFM_TPU_066,8,1648671234,test,168,0,0.08546,0.623228
9,BSCTRFM_TPU_066,9,1648671534,test,168,0,0.092733,0.724785


In [41]:
metrics = dict()

# 128 at float32
batch_size = '128'
metrics[batch_size] = metrics_dataframe[metrics_dataframe['model_id'] == 'BSCTRFM_TPU_066'].copy()

# 256 at float32
batch_size = '256'
metrics[batch_size] = metrics_dataframe[metrics_dataframe['model_id'] == 'BSCTRFM_TPU_060'].copy()

# 512 at float32
batch_size = '512'
metrics[batch_size] = metrics_dataframe[metrics_dataframe['model_id'] == 'BSCTRFM_TPU_062'].copy()

# 1024 at float32
batch_size = '1024'
metrics[batch_size] = metrics_dataframe[metrics_dataframe['model_id'] == 'BSCTRFM_TPU_064'].copy()

In [42]:
metrics['128'].describe()

Unnamed: 0,nd,nrmse
count,10.0,10.0
mean,0.085065,0.646179
std,0.00665,0.07754
min,0.075765,0.573557
25%,0.079921,0.585936
50%,0.083945,0.61555
75%,0.090757,0.685118
max,0.095633,0.810085


In [43]:
metrics['256'].describe()

Unnamed: 0,nd,nrmse
count,10.0,10.0
mean,0.082532,0.615509
std,0.005629,0.058946
min,0.07643,0.546181
25%,0.078726,0.577242
50%,0.081541,0.60829
75%,0.084142,0.646459
max,0.095132,0.740447


In [44]:
metrics['512'].describe()

Unnamed: 0,nd,nrmse
count,10.0,10.0
mean,0.085185,0.696161
std,0.007984,0.072378
min,0.071836,0.564659
25%,0.080593,0.65725
50%,0.086298,0.695295
75%,0.089705,0.719849
max,0.099778,0.836185


In [45]:
metrics['1024'].describe()

Unnamed: 0,nd,nrmse
count,10.0,10.0
mean,0.089175,0.694878
std,0.011171,0.098819
min,0.076041,0.507665
25%,0.078583,0.638777
50%,0.086747,0.699783
75%,0.100164,0.733371
max,0.10389,0.861092


In [46]:
# initialize the statistics dictionary
stats = {
    'qmin': {'128': None, '256': None, '512': None, '1024': None},
    'q1': {'128': None, '256': None, '512': None, '1024': None},
    'q2': {'128': None, '256': None, '512': None, '1024': None},
    'q3': {'128': None, '256': None, '512': None, '1024': None},
    'qmax': {'128': None, '256': None, '512': None, '1024': None},
    'iqr': {'128': None, '256': None, '512': None, '1024': None},
    'upper': {'128': None, '256': None, '512': None, '1024': None},
    'lower': {'128': None, '256': None, '512': None, '1024': None},
}

In [47]:
batch_sizes = ['128', '256', '512', '1024']

In [48]:
for batch_size in batch_sizes:
    stats['qmin'][batch_size] = metrics[batch_size].quantile(q=0.)
    stats['q1'][batch_size] = metrics[batch_size].quantile(q=0.25)
    stats['q2'][batch_size] = metrics[batch_size].quantile(q=0.5)
    stats['q3'][batch_size] = metrics[batch_size].quantile(q=0.75)
    stats['qmax'][batch_size] = metrics[batch_size].quantile(q=1.)
    stats['iqr'][batch_size] = stats['q3'][batch_size] - stats['q1'][batch_size]
    stats['upper'][batch_size] = stats['q3'][batch_size] + 1.5*stats['iqr'][batch_size]
    stats['lower'][batch_size] = stats['q1'][batch_size] - 1.5*stats['iqr'][batch_size]

In [49]:
# are there outliers for ND or NRMSE (values outside the inter-quantile range)?

In [50]:
for batch_size in batch_sizes:
    num_outliers = np.sum((metrics[batch_size]['nd'] < stats['lower'][batch_size]['nd']) \
    | \
    (metrics[batch_size]['nd'] > stats['upper'][batch_size]['nd']))

    print('The number of outliers for ND at batch size {} is {}'.format(batch_size, num_outliers))

The number of outliers for ND at batch size 128 is 0
The number of outliers for ND at batch size 256 is 1
The number of outliers for ND at batch size 512 is 0
The number of outliers for ND at batch size 1024 is 0


In [51]:
for batch_size in batch_sizes:
    num_outliers = np.sum((metrics[batch_size]['nrmse'] < stats['lower'][batch_size]['nrmse']) \
    | \
    (metrics[batch_size]['nrmse'] > stats['upper'][batch_size]['nrmse']))

    print('The number of outliers for NRMSE at batch size {} is {}'.format(batch_size, num_outliers))

The number of outliers for NRMSE at batch size 128 is 0
The number of outliers for NRMSE at batch size 256 is 0
The number of outliers for NRMSE at batch size 512 is 1
The number of outliers for NRMSE at batch size 1024 is 0


In [52]:
#ToDo: plot the previous good outlier (best value for NRMSE overall)!!!

In [53]:
plots = dict()

In [54]:
plots['nd'] = figure(
    width=460,
    height=460,
    x_range=batch_sizes,
    # background_fill_color="#efefef",
    toolbar_location=None)

# no background colors for exporting the image
plots['nd'].background_fill_color = None
plots['nd'].border_fill_color = None

# plots['nd'].xaxis.axis_label = "Batch Size"
# plots['nd'].yaxis.axis_label = "ND"
plots['nd'].axis.axis_label_text_font_style = "bold"
plots['nd'].axis.axis_label_text_font_size = "12pt"

# vertical bars for quantile description
# from Q1 to mean
plots['nd'].vbar(
    x=batch_sizes,
    width=0.5,
    bottom=[stats['q1'][batch_size]['nd'] for batch_size in batch_sizes],
    top=[stats['q2'][batch_size]['nd'] for batch_size in batch_sizes],
    fill_color="#3B8686",
    line_color="black")
# from mean to Q3
plots['nd'].vbar(
    x=batch_sizes,
    width=0.5,
    bottom=[stats['q2'][batch_size]['nd'] for batch_size in batch_sizes],
    top=[stats['q3'][batch_size]['nd'] for batch_size in batch_sizes],
    fill_color="#E08E79",
    line_color="black")

# segments for inter-quantile description
# lower
plots['nd'].segment(
    x0=batch_sizes,
    y0=[stats['lower'][batch_size]['nd'] for batch_size in batch_sizes],
    x1=batch_sizes,
    y1=[stats['q1'][batch_size]['nd'] for batch_size in batch_sizes],
    line_color='black')
# higher
plots['nd'].segment(
    x0=batch_sizes,
    y0=[stats['q3'][batch_size]['nd'] for batch_size in batch_sizes],
    x1=batch_sizes,
    y1=[stats['upper'][batch_size]['nd'] for batch_size in batch_sizes],
    line_color='black')

# whiskers (almost-0 height rects simpler than segments)
# lower
plots['nd'].rect(
    x=batch_sizes,
    y=[stats['lower'][batch_size]['nd'] for batch_size in batch_sizes],
    width=0.2,
    height=0.00001,
    line_color="black")
# higher
plots['nd'].rect(
    x=batch_sizes,
    y=[stats['upper'][batch_size]['nd'] for batch_size in batch_sizes],
    width=0.2,
    height=0.00001,
    line_color="black")

In [56]:
plots['nrmse'] = figure(
    width=460,
    height=460,
    x_range=batch_sizes,
    # background_fill_color="#efefef",
    toolbar_location=None)

# plots['nrmse'].yaxis.axis_label = "NRMSE"
# plots['nrmse'].xaxis.axis_label = "Batch Size"
plots['nrmse'].axis.axis_label_text_font_style = "bold"
plots['nrmse'].axis.axis_label_text_font_size = "12pt"

# vertical bars for quantile description
# from Q1 to mean
plots['nrmse'].vbar(
    x=batch_sizes,
    width=0.5,
    bottom=[stats['q1'][batch_size]['nrmse'] for batch_size in batch_sizes],
    top=[stats['q2'][batch_size]['nrmse'] for batch_size in batch_sizes],
    fill_color="#3B8686",
    line_color="black")
# from mean to Q3
plots['nrmse'].vbar(
    x=batch_sizes,
    width=0.5,
    bottom=[stats['q2'][batch_size]['nrmse'] for batch_size in batch_sizes],
    top=[stats['q3'][batch_size]['nrmse'] for batch_size in batch_sizes],
    fill_color="#E08E79",
    line_color="black")

# segments for inter-quantile description
# lower
plots['nrmse'].segment(
    x0=batch_sizes,
    y0=[stats['lower'][batch_size]['nrmse'] for batch_size in batch_sizes],
    x1=batch_sizes,
    y1=[stats['q1'][batch_size]['nrmse'] for batch_size in batch_sizes],
    line_color='black')
# higher
plots['nrmse'].segment(
    x0=batch_sizes,
    y0=[stats['q3'][batch_size]['nrmse'] for batch_size in batch_sizes],
    x1=batch_sizes,
    y1=[stats['upper'][batch_size]['nrmse'] for batch_size in batch_sizes],
    line_color='black')

# whiskers (almost-0 height rects simpler than segments)
# lower
plots['nrmse'].rect(
    x=batch_sizes,
    y=[stats['lower'][batch_size]['nrmse'] for batch_size in batch_sizes],
    width=0.2,
    height=0.00001,
    line_color="black")
# higher
plots['nrmse'].rect(
    x=batch_sizes,
    y=[stats['upper'][batch_size]['nrmse'] for batch_size in batch_sizes],
    width=0.2,
    height=0.00001,
    line_color="black")

In [57]:
show(row(plots['nd'], plots['nrmse']))