## consolidate predictions detail and get global metrics for electricity dataset

In [1]:
import os
import json
import numpy as np
import pandas as pd
import joblib
from datetime import datetime
from math import sqrt
from sklearn.metrics import mean_squared_error, mean_absolute_error

In [2]:
from bokeh.plotting import figure, show, output_file, save
from bokeh.io import output_notebook
from bokeh.layouts import row, gridplot, layout
from bokeh.palettes import d3
from bokeh.models import Span

output_notebook()

In [3]:
# pd.set_option('display.max_rows', 200)

In [4]:
PROJECT_ROOT = '/home/developer/gcp/cbidmltsf'

In [5]:
# build a list with customer_ids
start, end = 1, 370

customer_ids = ['MT_{:03d}'.format(token_id) for token_id in np.arange(start, end + 1)]

In [6]:
len(customer_ids)

370

In [7]:
sldb_id = 'LD2011-2014_SEPARATED_FULL_BSCTRFM_168_168_07DB_MMX'
sldb_id

'LD2011-2014_SEPARATED_FULL_BSCTRFM_168_168_07DB_MMX'

In [8]:
forecast_window = 24
inference = '{:03d}'.format(forecast_window)

In [9]:
# build a path to the SLDB json file
data_dir = '{}/{}/{}'.format(PROJECT_ROOT, 'sldbs', sldb_id)
data_dir

'/home/developer/gcp/cbidmltsf/sldbs/LD2011-2014_SEPARATED_FULL_BSCTRFM_168_168_07DB_MMX'

In [10]:
# then get the ts_identifier from the json file in the sldb directory
sldb_json_file = '{}/sldb.json'.format(data_dir)

In [11]:
# open the json file
with open(sldb_json_file, 'r') as inputfile:
    sldb_dict = json.load(inputfile)

In [12]:
ts_identifier = sldb_dict['ts']
m = sldb_dict['embedding']['hourly']
t = sldb_dict['no_targets']

In [13]:
ts_identifier, m, t

('LD2011-2014_SEPARATED_FULL', 168, 168)

## load all inference pickle files, then group results per saved model

In [14]:
pickle_path = '{}/inferences/electricity'.format(PROJECT_ROOT)

In [15]:
all_pickle_files = os.listdir(pickle_path)

In [16]:
all_pickle_files.sort()

In [17]:
len(all_pickle_files)

85

In [18]:
# pickle files to be excluded from metric analysis
excluded_pickle_files = [
    'BSCTRFM_TPU_055_01_1637865979_test_024_00.pkl',
    'BSCTRFM_TPU_055_01_1637865979_test_024_01.pkl',
    'BSCTRFM_TPU_055_01_1637865979_test_024_02.pkl',
    'BSCTRFM_TPU_055_01_1637865979_test_024_03.pkl',
    'BSCTRFM_TPU_055_04_1643310398_test_024_00.pkl',
    'BSCTRFM_TPU_055_05_1643310781_test_024_00.pkl',
    'BSCTRFM_TPU_055_06_1643311179_test_024_00.pkl',
    'BSCTRFM_TPU_057_00_1643339649_test_024_00.pkl',
    'BSCTRFM_TPU_057_00_1643339649_test_024_01.pkl',
    'BSCTRFM_TPU_057_01_1643340274_test_024_00.pkl',
    'BSCTRFM_TPU_057_01_1643340274_test_024_01.pkl',
    'BSCTRFM_TPU_058_00_1643341050_test_024_00.pkl',
    'BSCTRFM_TPU_058_00_1643341050_test_024_01.pkl',
    'BSCTRFM_TPU_058_01_1643341809_test_024_00.pkl',
    'BSCTRFM_TPU_058_01_1643341809_test_024_01.pkl',
    'BSCTRFM_TPU_059_00_1643342181_test_024_00.pkl',
    'BSCTRFM_TPU_059_00_1643342181_test_024_01.pkl',
    'BSCTRFM_TPU_059_01_1643342548_test_024_00.pkl',
    'BSCTRFM_TPU_059_01_1643342548_test_024_01.pkl',
    'BSCTRFM_TPU_060_00_1643342901_test_024_01.pkl',
    'BSCTRFM_TPU_060_01_1643343208_test_024_01.pkl',
    'BSCTRFM_TPU_061_00_1643391657_test_024_00.pkl',
    'BSCTRFM_TPU_061_01_1643392130_test_024_00.pkl',
    'BSCTRFM_TPU_061_02_1643392494_test_024_00.pkl',
    'BSCTRFM_TPU_061_03_1643392797_test_024_00.pkl',
    'BSCTRFM_TPU_062_00_1648751508_test_024_00.pkl',
    'BSCTRFM_TPU_062_01_1648751811_test_024_00.pkl',
    'BSCTRFM_TPU_062_02_1648752107_test_024_00.pkl',
    # 'BSCTRFM_TPU_062_03_1648752404_test_024_00.pkl',
    'BSCTRFM_TPU_063_00_1648501348_test_024_00.pkl',
    'BSCTRFM_TPU_063_01_1648501641_test_024_00.pkl',
    'BSCTRFM_TPU_063_02_1648510068_test_024_00.pkl',
    'BSCTRFM_TPU_063_03_1648510358_test_024_00.pkl',
    'BSCTRFM_TPU_063_04_1648510650_test_024_00.pkl',
    'BSCTRFM_TPU_063_05_1648510951_test_024_00.pkl',
    'BSCTRFM_TPU_063_06_1648511264_test_024_00.pkl',
    'BSCTRFM_TPU_063_07_1648511559_test_024_00.pkl',
    'BSCTRFM_TPU_063_08_1648511854_test_024_00.pkl',
    'BSCTRFM_TPU_063_09_1648512146_test_024_00.pkl',
    'BSCTRFM_TPU_065_00_1648580443_test_024_00.pkl',
    'BSCTRFM_TPU_065_01_1648580739_test_024_00.pkl',
    'BSCTRFM_TPU_065_02_1648581039_test_024_00.pkl',
    'BSCTRFM_TPU_065_03_1648580141_test_024_00.pkl',
    'BSCTRFM_TPU_065_04_1648581337_test_024_00.pkl',
    'BSCTRFM_TPU_065_05_1648581628_test_024_00.pkl',
    'BSCTRFM_TPU_065_06_1648581935_test_024_00.pkl',
]

In [19]:
# use only selected pickle_files
pickle_files = [file for file in all_pickle_files if file not in excluded_pickle_files]

In [20]:
len(pickle_files)

40

In [21]:
metrics_dataframe_columns = [
    'model_id',
    'execution',
    'saved_model_id',
    'dataset',
    'inference',
    'event',
    'nd',
    'nrmse'
]

In [22]:
metrics_dataframe = pd.DataFrame(columns=metrics_dataframe_columns)

for pickle_file in pickle_files:
       
    inferences_df = pd.read_pickle('{}/{}'.format(pickle_path, pickle_file))

    # buffer_df = pd.DataFrame(columns=metrics_dataframe_columns)
    
    model_id = pickle_file[:15]
    execution = int(pickle_file[16:18])
    saved_model_id = int(pickle_file[19:29])
    dataset = pickle_file[30:34]
    inference = pickle_file[35:38]
    event = int(pickle_file[39:41])     
    
    mae = mean_absolute_error(inferences_df['prediction'], inferences_df['target'])
    true_values_average = np.mean(inferences_df['target'])
    nd = mae/true_values_average
    rmse = sqrt(mean_squared_error(inferences_df['prediction'], inferences_df['target']))
    nrmse = rmse/true_values_average
    
    row_dict = {
        'model_id': model_id,
        'execution': execution,
        'saved_model_id': saved_model_id,
        'dataset': dataset,
        'inference': inference,
        'event': event,
        'nd': nd,
        'nrmse': nrmse
    }
    
    metrics_dataframe = metrics_dataframe.append(row_dict, ignore_index = True)

In [23]:
metrics_dataframe

Unnamed: 0,model_id,execution,saved_model_id,dataset,inference,event,nd,nrmse
0,BSCTRFM_TPU_060,0,1643342901,test,24,0,0.068724,0.581921
1,BSCTRFM_TPU_060,1,1643343208,test,24,0,0.067696,0.521816
2,BSCTRFM_TPU_060,2,1643388975,test,24,0,0.070347,0.5357
3,BSCTRFM_TPU_060,3,1643389253,test,24,0,0.068057,0.496785
4,BSCTRFM_TPU_060,4,1643389647,test,24,0,0.072669,0.561613
5,BSCTRFM_TPU_060,5,1643389923,test,24,0,0.075325,0.634866
6,BSCTRFM_TPU_060,6,1643390226,test,24,0,0.077632,0.593491
7,BSCTRFM_TPU_060,7,1643390534,test,24,0,0.071272,0.556367
8,BSCTRFM_TPU_060,8,1643390838,test,24,0,0.066514,0.502827
9,BSCTRFM_TPU_060,9,1643391143,test,24,0,0.066489,0.474466


In [24]:
metrics = dict()

# 128 at float32
batch_size = '128'
metrics[batch_size] = metrics_dataframe[metrics_dataframe['model_id'] == 'BSCTRFM_TPU_066'].copy()

# 256 at float32
batch_size = '256'
metrics[batch_size] = metrics_dataframe[metrics_dataframe['model_id'] == 'BSCTRFM_TPU_060'].copy()

# 512 at float32
batch_size = '512'
metrics[batch_size] = metrics_dataframe[metrics_dataframe['model_id'] == 'BSCTRFM_TPU_062'].copy()

# 1024 at float32
batch_size = '1024'
metrics[batch_size] = metrics_dataframe[metrics_dataframe['model_id'] == 'BSCTRFM_TPU_064'].copy()

In [27]:
metrics['128'].describe()

Unnamed: 0,nd,nrmse
count,10.0,10.0
mean,0.072503,0.570393
std,0.004208,0.047781
min,0.066681,0.493467
25%,0.069225,0.537431
50%,0.071661,0.563293
75%,0.076635,0.596499
max,0.078204,0.650219


In [26]:
metrics['256'].describe()

Unnamed: 0,nd,nrmse
count,10.0,10.0
mean,0.070472,0.545985
std,0.003777,0.049316
min,0.066489,0.474466
25%,0.067786,0.507574
50%,0.069536,0.546034
75%,0.072319,0.576844
max,0.077632,0.634866


In [28]:
metrics['512'].describe()

Unnamed: 0,nd,nrmse
count,10.0,10.0
mean,0.072916,0.600318
std,0.005559,0.06907
min,0.064108,0.497414
25%,0.069811,0.550495
50%,0.073201,0.614509
75%,0.075831,0.637241
max,0.083217,0.730422


In [29]:
metrics['1024'].describe()

Unnamed: 0,nd,nrmse
count,10.0,10.0
mean,0.073368,0.589168
std,0.005629,0.0569
min,0.065097,0.467314
25%,0.069171,0.56345
50%,0.073191,0.592497
75%,0.078248,0.617904
max,0.080903,0.672141


In [46]:
# initialize the statistics dictionary
stats = {
    'qmin': {'128': None, '256': None, '512': None, '1024': None},
    'q1': {'128': None, '256': None, '512': None, '1024': None},
    'q2': {'128': None, '256': None, '512': None, '1024': None},
    'q3': {'128': None, '256': None, '512': None, '1024': None},
    'qmax': {'128': None, '256': None, '512': None, '1024': None},
    'iqr': {'128': None, '256': None, '512': None, '1024': None},
    'upper': {'128': None, '256': None, '512': None, '1024': None},
    'lower': {'128': None, '256': None, '512': None, '1024': None},
}

In [47]:
batch_sizes = ['128', '256', '512', '1024']

In [48]:
for batch_size in batch_sizes:
    stats['qmin'][batch_size] = metrics[batch_size].quantile(q=0.)
    stats['q1'][batch_size] = metrics[batch_size].quantile(q=0.25)
    stats['q2'][batch_size] = metrics[batch_size].quantile(q=0.5)
    stats['q3'][batch_size] = metrics[batch_size].quantile(q=0.75)
    stats['qmax'][batch_size] = metrics[batch_size].quantile(q=1.)
    stats['iqr'][batch_size] = stats['q3'][batch_size] - stats['q1'][batch_size]
    stats['upper'][batch_size] = stats['q3'][batch_size] + 1.5*stats['iqr'][batch_size]
    stats['lower'][batch_size] = stats['q1'][batch_size] - 1.5*stats['iqr'][batch_size]

In [30]:
# are there outliers for ND or NRMSE (values outside the inter-quantile range)?

In [49]:
for batch_size in batch_sizes:
    num_outliers = np.sum((metrics[batch_size]['nd'] < stats['lower'][batch_size]['nd']) \
    | \
    (metrics[batch_size]['nd'] > stats['upper'][batch_size]['nd']))

    print('The number of outliers for ND at batch size {} is {}'.format(batch_size, num_outliers))

The number of outliers for ND at batch size 128 is 0
The number of outliers for ND at batch size 256 is 0
The number of outliers for ND at batch size 512 is 0
The number of outliers for ND at batch size 1024 is 0


In [50]:
for batch_size in batch_sizes:
    num_outliers = np.sum((metrics[batch_size]['nrmse'] < stats['lower'][batch_size]['nrmse']) \
    | \
    (metrics[batch_size]['nrmse'] > stats['upper'][batch_size]['nrmse']))

    print('The number of outliers for NRMSE at batch size {} is {}'.format(batch_size, num_outliers))

The number of outliers for NRMSE at batch size 128 is 0
The number of outliers for NRMSE at batch size 256 is 0
The number of outliers for NRMSE at batch size 512 is 0
The number of outliers for NRMSE at batch size 1024 is 1


In [51]:
#ToDo: plot the previous good outlier (best value for NRMSE overall)!!!

In [136]:
from bokeh.plotting import figure, show
from bokeh.layouts import row
output_notebook()

In [158]:
plots = dict()

In [182]:
plots['nd'] = figure(
    width=460,
    height=460,
    x_range=batch_sizes,
    # background_fill_color="#efefef",
    toolbar_location=None)

# plots['nd'].xaxis.axis_label = "Batch Size"
# plots['nd'].yaxis.axis_label = "ND"
plots['nd'].axis.axis_label_text_font_style = "bold"
plots['nd'].axis.axis_label_text_font_size = "12pt"

# vertical bars for quantile description
# from Q1 to mean
plots['nd'].vbar(
    x=batch_sizes,
    width=0.5,
    bottom=[stats['q1'][batch_size]['nd'] for batch_size in batch_sizes],
    top=[stats['q2'][batch_size]['nd'] for batch_size in batch_sizes],
    fill_color="#3B8686",
    line_color="black")
# from mean to Q3
plots['nd'].vbar(
    x=batch_sizes,
    width=0.5,
    bottom=[stats['q2'][batch_size]['nd'] for batch_size in batch_sizes],
    top=[stats['q3'][batch_size]['nd'] for batch_size in batch_sizes],
    fill_color="#E08E79",
    line_color="black")

# segments for inter-quantile description
# lower
plots['nd'].segment(
    x0=batch_sizes,
    y0=[stats['lower'][batch_size]['nd'] for batch_size in batch_sizes],
    x1=batch_sizes,
    y1=[stats['q1'][batch_size]['nd'] for batch_size in batch_sizes],
    line_color='black')
# higher
plots['nd'].segment(
    x0=batch_sizes,
    y0=[stats['q3'][batch_size]['nd'] for batch_size in batch_sizes],
    x1=batch_sizes,
    y1=[stats['upper'][batch_size]['nd'] for batch_size in batch_sizes],
    line_color='black')

# whiskers (almost-0 height rects simpler than segments)
# lower
plots['nd'].rect(
    x=batch_sizes,
    y=[stats['lower'][batch_size]['nd'] for batch_size in batch_sizes],
    width=0.2,
    height=0.00001,
    line_color="black")
# higher
plots['nd'].rect(
    x=batch_sizes,
    y=[stats['upper'][batch_size]['nd'] for batch_size in batch_sizes],
    width=0.2,
    height=0.00001,
    line_color="black")

In [183]:
plots['nrmse'] = figure(
    width=460,
    height=460,
    x_range=batch_sizes,
    # background_fill_color="#efefef",
    toolbar_location=None)

# plots['nrmse'].yaxis.axis_label = "NRMSE"
# plots['nrmse'].xaxis.axis_label = "Batch Size"
plots['nrmse'].axis.axis_label_text_font_style = "bold"
plots['nrmse'].axis.axis_label_text_font_size = "12pt"

# vertical bars for quantile description
# from Q1 to mean
plots['nrmse'].vbar(
    x=batch_sizes,
    width=0.5,
    bottom=[stats['q1'][batch_size]['nrmse'] for batch_size in batch_sizes],
    top=[stats['q2'][batch_size]['nrmse'] for batch_size in batch_sizes],
    fill_color="#3B8686",
    line_color="black")
# from mean to Q3
plots['nrmse'].vbar(
    x=batch_sizes,
    width=0.5,
    bottom=[stats['q2'][batch_size]['nrmse'] for batch_size in batch_sizes],
    top=[stats['q3'][batch_size]['nrmse'] for batch_size in batch_sizes],
    fill_color="#E08E79",
    line_color="black")

# segments for inter-quantile description
# lower
plots['nrmse'].segment(
    x0=batch_sizes,
    y0=[stats['lower'][batch_size]['nrmse'] for batch_size in batch_sizes],
    x1=batch_sizes,
    y1=[stats['q1'][batch_size]['nrmse'] for batch_size in batch_sizes],
    line_color='black')
# higher
plots['nrmse'].segment(
    x0=batch_sizes,
    y0=[stats['q3'][batch_size]['nrmse'] for batch_size in batch_sizes],
    x1=batch_sizes,
    y1=[stats['upper'][batch_size]['nrmse'] for batch_size in batch_sizes],
    line_color='black')

# whiskers (almost-0 height rects simpler than segments)
# lower
plots['nrmse'].rect(
    x=batch_sizes,
    y=[stats['lower'][batch_size]['nrmse'] for batch_size in batch_sizes],
    width=0.2,
    height=0.00001,
    line_color="black")
# higher
plots['nrmse'].rect(
    x=batch_sizes,
    y=[stats['upper'][batch_size]['nrmse'] for batch_size in batch_sizes],
    width=0.2,
    height=0.00001,
    line_color="black")

In [184]:
show(row(plots['nd'], plots['nrmse']))