## consolidate predictions detail and get global metrics

In [1]:
import os
import json
import numpy as np
import pandas as pd
import joblib
from datetime import datetime
from math import sqrt
from sklearn.metrics import mean_squared_error, mean_absolute_error

In [2]:
from bokeh.plotting import figure, show, output_file, save
from bokeh.io import output_notebook
from bokeh.layouts import row, gridplot, layout
from bokeh.palettes import d3
from bokeh.models import Span

output_notebook()

In [3]:
# pd.set_option('display.max_rows', 200)

In [4]:
PROJECT_ROOT = '/home/developer/gcp/cbidmltsf'

In [5]:
data_folder = '{}/datasets/traffic/PEMS-SF'.format(PROJECT_ROOT)
data_folder

'/home/developer/gcp/cbidmltsf/datasets/traffic/PEMS-SF'

In [6]:
# build the station_ids list

In [7]:
def process_list(s, variable_type=int, delimiter=None):
    """Parses a line in the PEMS format to a list."""
    if delimiter is None:
      l = [
          variable_type(i) for i in s.replace('[', '').replace(']', '').split()
      ]
    else:
      l = [
          variable_type(i)
          for i in s.replace('[', '').replace(']', '').split(delimiter)
      ]

    return l

In [8]:
def read_single_list(filename):
    """Returns single list from a file in the PEMS-custom format."""
    with open(os.path.join(data_folder, filename), 'r') as dat:
        l = process_list(dat.readlines()[0])
    return l

In [9]:
station_ids = [id for id in read_single_list('stations_list')]
len(station_ids)

963

In [10]:
sldb_id = 'PEMS-SF_SEPARATED_FULL_BSCTRFM_168_168_07DB_MMX'
sldb_id

'PEMS-SF_SEPARATED_FULL_BSCTRFM_168_168_07DB_MMX'

In [11]:
forecast_window = 168
inference = '{:03d}'.format(forecast_window)

In [12]:
# build a path to the SLDB json file
data_dir = '{}/{}/{}'.format(PROJECT_ROOT, 'sldbs', sldb_id)
data_dir

'/home/developer/gcp/cbidmltsf/sldbs/PEMS-SF_SEPARATED_FULL_BSCTRFM_168_168_07DB_MMX'

In [13]:
# then get the ts_identifier from the json file in the sldb directory
sldb_json_file = '{}/sldb.json'.format(data_dir)

In [14]:
# open the json file
with open(sldb_json_file, 'r') as inputfile:
    sldb_dict = json.load(inputfile)

In [15]:
ts_identifier = sldb_dict['ts']
m = sldb_dict['embedding']['hourly']
t = sldb_dict['no_targets']

In [16]:
ts_identifier, m, t

('PEMS-SF_SEPARATED_FULL', 168, 168)

## load all inference pickle files, then group results per saved model

In [17]:
pickle_path = '{}/inferences/traffic'.format(PROJECT_ROOT)
pickle_path

'/home/developer/gcp/cbidmltsf/inferences/traffic'

In [40]:
# manually get the required pickle files
pickle_files = [
    'BSCTRFM_TPU_073_00_1649257741_test_168_00.pkl',
    'BSCTRFM_TPU_073_01_1649344388_test_168_00.pkl',
    'BSCTRFM_TPU_073_02_1649344771_test_168_00.pkl',
    'BSCTRFM_TPU_073_03_1649345163_test_168_00.pkl',
    'BSCTRFM_TPU_073_04_1649345552_test_168_00.pkl',
    'BSCTRFM_TPU_073_05_1649345937_test_168_00.pkl',
    'BSCTRFM_TPU_073_06_1649346340_test_168_00.pkl',
    'BSCTRFM_TPU_073_07_1649346725_test_168_00.pkl',
    'BSCTRFM_TPU_073_08_1649347143_test_168_00.pkl',
    'BSCTRFM_TPU_073_09_1649347597_test_168_00.pkl',
    #
    'BSCTRFM_TPU_075_00_1637777357_test_168_00.pkl',
    'BSCTRFM_TPU_075_01_1637777674_test_168_00.pkl',
    'BSCTRFM_TPU_075_02_1643728821_test_168_00.pkl',
    'BSCTRFM_TPU_075_03_1643729170_test_168_00.pkl',
    'BSCTRFM_TPU_075_04_1643729516_test_168_00.pkl',
    'BSCTRFM_TPU_075_05_1643729861_test_168_00.pkl',
    'BSCTRFM_TPU_075_06_1643730222_test_168_00.pkl',
    'BSCTRFM_TPU_075_07_1643730588_test_168_00.pkl',
    'BSCTRFM_TPU_075_08_1643730931_test_168_00.pkl',
    'BSCTRFM_TPU_075_09_1643731285_test_168_00.pkl',
    #
    'BSCTRFM_TPU_077_00_1649347972_test_168_00.pkl',
    'BSCTRFM_TPU_077_01_1649348340_test_168_00.pkl',
    'BSCTRFM_TPU_077_02_1649348824_test_168_00.pkl',
    'BSCTRFM_TPU_077_03_1649350867_test_168_00.pkl',
    'BSCTRFM_TPU_077_04_1649351220_test_168_00.pkl',
    'BSCTRFM_TPU_077_05_1649351571_test_168_00.pkl',
    'BSCTRFM_TPU_077_06_1649351931_test_168_00.pkl',
    'BSCTRFM_TPU_077_07_1649357647_test_168_00.pkl',
    'BSCTRFM_TPU_077_08_1649358488_test_168_00.pkl',
    'BSCTRFM_TPU_077_09_1649358841_test_168_00.pkl',
    #
    'BSCTRFM_TPU_079_00_1649359208_test_168_00.pkl',
    'BSCTRFM_TPU_079_01_1649359627_test_168_00.pkl',
    'BSCTRFM_TPU_079_02_1649431267_test_168_00.pkl',
    'BSCTRFM_TPU_079_03_1649434658_test_168_00.pkl',
    'BSCTRFM_TPU_079_04_1649435015_test_168_00.pkl',
    'BSCTRFM_TPU_079_05_1649435365_test_168_00.pkl',
    'BSCTRFM_TPU_079_06_1649435721_test_168_00.pkl',
    'BSCTRFM_TPU_079_07_1649436072_test_168_00.pkl',
    'BSCTRFM_TPU_079_08_1649436430_test_168_00.pkl',
    'BSCTRFM_TPU_079_09_1649436784_test_168_00.pkl',
]

In [41]:
len(pickle_files)

40

In [42]:
metrics_dataframe_columns = [
    'model_id',
    'execution',
    'saved_model_id',
    'dataset',
    'inference',
    'event',
    'nd',
    'nrmse'
]

In [43]:
metrics_dataframe = pd.DataFrame(columns=metrics_dataframe_columns)

for pickle_file in pickle_files:
       
    inferences_df = pd.read_pickle('{}/{}'.format(pickle_path, pickle_file))

    # buffer_df = pd.DataFrame(columns=metrics_dataframe_columns)
    
    model_id = pickle_file[:15]
    execution = int(pickle_file[16:18])
    saved_model_id = int(pickle_file[19:29])
    dataset = pickle_file[30:34]
    inference = pickle_file[35:38]
    event = int(pickle_file[39:41])     
    
    mae = mean_absolute_error(inferences_df['prediction'], inferences_df['target'])
    true_values_average = np.mean(inferences_df['target'])
    nd = mae/true_values_average
    rmse = sqrt(mean_squared_error(inferences_df['prediction'], inferences_df['target']))
    nrmse = rmse/true_values_average
    
    row_dict = {
        'model_id': model_id,
        'execution': execution,
        'saved_model_id': saved_model_id,
        'dataset': dataset,
        'inference': inference,
        'event': event,
        'nd': nd,
        'nrmse': nrmse
    }
    
    metrics_dataframe = metrics_dataframe.append(row_dict, ignore_index = True)

In [44]:
metrics_dataframe

Unnamed: 0,model_id,execution,saved_model_id,dataset,inference,event,nd,nrmse
0,BSCTRFM_TPU_073,0,1649257741,test,168,0,0.212242,0.452723
1,BSCTRFM_TPU_073,1,1649344388,test,168,0,0.211509,0.457091
2,BSCTRFM_TPU_073,2,1649344771,test,168,0,0.199199,0.423535
3,BSCTRFM_TPU_073,3,1649345163,test,168,0,0.201927,0.445268
4,BSCTRFM_TPU_073,4,1649345552,test,168,0,0.300812,0.519167
5,BSCTRFM_TPU_073,5,1649345937,test,168,0,0.180571,0.433262
6,BSCTRFM_TPU_073,6,1649346340,test,168,0,0.228002,0.450211
7,BSCTRFM_TPU_073,7,1649346725,test,168,0,0.19424,0.460167
8,BSCTRFM_TPU_073,8,1649347143,test,168,0,0.281461,0.52683
9,BSCTRFM_TPU_073,9,1649347597,test,168,0,0.161356,0.420486


In [45]:
metrics = dict()

# 128 at float32
batch_size = '128'
metrics[batch_size] = metrics_dataframe[metrics_dataframe['model_id'] == 'BSCTRFM_TPU_073'].copy()

# 256 at float32
batch_size = '256'
metrics[batch_size] = metrics_dataframe[metrics_dataframe['model_id'] == 'BSCTRFM_TPU_075'].copy()

# 512 at float32
batch_size = '512'
metrics[batch_size] = metrics_dataframe[metrics_dataframe['model_id'] == 'BSCTRFM_TPU_077'].copy()

# 1024 at float32
batch_size = '1024'
metrics[batch_size] = metrics_dataframe[metrics_dataframe['model_id'] == 'BSCTRFM_TPU_079'].copy()

In [46]:
metrics['128'].describe()

Unnamed: 0,nd,nrmse
count,10.0,10.0
mean,0.217132,0.458874
std,0.043255,0.036415
min,0.161356,0.420486
25%,0.19548,0.436263
50%,0.206718,0.451467
75%,0.224062,0.459398
max,0.300812,0.52683


In [47]:
metrics['256'].describe()

Unnamed: 0,nd,nrmse
count,10.0,10.0
mean,0.202824,0.448452
std,0.032228,0.025204
min,0.172128,0.423623
25%,0.181385,0.429159
50%,0.190816,0.439283
75%,0.220694,0.467502
max,0.268702,0.488344


In [48]:
metrics['512'].describe()

Unnamed: 0,nd,nrmse
count,10.0,10.0
mean,0.234465,0.473096
std,0.04116,0.033014
min,0.180294,0.424285
25%,0.194924,0.448137
50%,0.23902,0.469231
75%,0.264012,0.489406
max,0.302854,0.523896


In [49]:
metrics['1024'].describe()

Unnamed: 0,nd,nrmse
count,10.0,10.0
mean,0.259432,0.513727
std,0.06898,0.073082
min,0.179291,0.442417
25%,0.203829,0.446662
50%,0.247114,0.514392
75%,0.308919,0.547281
max,0.392601,0.66813


In [50]:
# initialize the statistics dictionary
stats = {
    'qmin': {'128': None, '256': None, '512': None, '1024': None},
    'q1': {'128': None, '256': None, '512': None, '1024': None},
    'q2': {'128': None, '256': None, '512': None, '1024': None},
    'q3': {'128': None, '256': None, '512': None, '1024': None},
    'qmax': {'128': None, '256': None, '512': None, '1024': None},
    'iqr': {'128': None, '256': None, '512': None, '1024': None},
    'upper': {'128': None, '256': None, '512': None, '1024': None},
    'lower': {'128': None, '256': None, '512': None, '1024': None},
}

In [51]:
batch_sizes = ['128', '256', '512', '1024']

In [52]:
for batch_size in batch_sizes:
    stats['qmin'][batch_size] = metrics[batch_size].quantile(q=0.)
    stats['q1'][batch_size] = metrics[batch_size].quantile(q=0.25)
    stats['q2'][batch_size] = metrics[batch_size].quantile(q=0.5)
    stats['q3'][batch_size] = metrics[batch_size].quantile(q=0.75)
    stats['qmax'][batch_size] = metrics[batch_size].quantile(q=1.)
    stats['iqr'][batch_size] = stats['q3'][batch_size] - stats['q1'][batch_size]
    stats['upper'][batch_size] = stats['q3'][batch_size] + 1.5*stats['iqr'][batch_size]
    stats['lower'][batch_size] = stats['q1'][batch_size] - 1.5*stats['iqr'][batch_size]

In [53]:
# are there outliers for ND or NRMSE (values outside the inter-quantile range)?

In [54]:
for batch_size in batch_sizes:
    num_outliers = np.sum((metrics[batch_size]['nd'] < stats['lower'][batch_size]['nd']) \
    | \
    (metrics[batch_size]['nd'] > stats['upper'][batch_size]['nd']))

    print('The number of outliers for ND at batch size {} is {}'.format(batch_size, num_outliers))

The number of outliers for ND at batch size 128 is 2
The number of outliers for ND at batch size 256 is 0
The number of outliers for ND at batch size 512 is 0
The number of outliers for ND at batch size 1024 is 0


In [55]:
for batch_size in batch_sizes:
    num_outliers = np.sum((metrics[batch_size]['nrmse'] < stats['lower'][batch_size]['nrmse']) \
    | \
    (metrics[batch_size]['nrmse'] > stats['upper'][batch_size]['nrmse']))

    print('The number of outliers for NRMSE at batch size {} is {}'.format(batch_size, num_outliers))

The number of outliers for NRMSE at batch size 128 is 2
The number of outliers for NRMSE at batch size 256 is 0
The number of outliers for NRMSE at batch size 512 is 0
The number of outliers for NRMSE at batch size 1024 is 0


In [56]:
# ToDo: plot outliers in boxplots?

In [62]:
from bokeh.plotting import figure, show
from bokeh.layouts import row
output_notebook()

In [58]:
plots = dict()

In [59]:
plots['nd'] = figure(
    width=460,
    height=460,
    x_range=batch_sizes,
    # background_fill_color="#efefef",
    toolbar_location=None)

# plots['nd'].xaxis.axis_label = "Batch Size"
# plots['nd'].yaxis.axis_label = "ND"
plots['nd'].axis.axis_label_text_font_style = "bold"
plots['nd'].axis.axis_label_text_font_size = "12pt"

# vertical bars for quantile description
# from Q1 to mean
plots['nd'].vbar(
    x=batch_sizes,
    width=0.5,
    bottom=[stats['q1'][batch_size]['nd'] for batch_size in batch_sizes],
    top=[stats['q2'][batch_size]['nd'] for batch_size in batch_sizes],
    fill_color="#3B8686",
    line_color="black")
# from mean to Q3
plots['nd'].vbar(
    x=batch_sizes,
    width=0.5,
    bottom=[stats['q2'][batch_size]['nd'] for batch_size in batch_sizes],
    top=[stats['q3'][batch_size]['nd'] for batch_size in batch_sizes],
    fill_color="#E08E79",
    line_color="black")

# segments for inter-quantile description
# lower
plots['nd'].segment(
    x0=batch_sizes,
    y0=[stats['lower'][batch_size]['nd'] for batch_size in batch_sizes],
    x1=batch_sizes,
    y1=[stats['q1'][batch_size]['nd'] for batch_size in batch_sizes],
    line_color='black')
# higher
plots['nd'].segment(
    x0=batch_sizes,
    y0=[stats['q3'][batch_size]['nd'] for batch_size in batch_sizes],
    x1=batch_sizes,
    y1=[stats['upper'][batch_size]['nd'] for batch_size in batch_sizes],
    line_color='black')

# whiskers (almost-0 height rects simpler than segments)
# lower
plots['nd'].rect(
    x=batch_sizes,
    y=[stats['lower'][batch_size]['nd'] for batch_size in batch_sizes],
    width=0.2,
    height=0.00001,
    line_color="black")
# higher
plots['nd'].rect(
    x=batch_sizes,
    y=[stats['upper'][batch_size]['nd'] for batch_size in batch_sizes],
    width=0.2,
    height=0.00001,
    line_color="black")

In [60]:
plots['nrmse'] = figure(
    width=460,
    height=460,
    x_range=batch_sizes,
    # background_fill_color="#efefef",
    toolbar_location=None)

# plots['nrmse'].yaxis.axis_label = "NRMSE"
# plots['nrmse'].xaxis.axis_label = "Batch Size"
plots['nrmse'].axis.axis_label_text_font_style = "bold"
plots['nrmse'].axis.axis_label_text_font_size = "12pt"

# vertical bars for quantile description
# from Q1 to mean
plots['nrmse'].vbar(
    x=batch_sizes,
    width=0.5,
    bottom=[stats['q1'][batch_size]['nrmse'] for batch_size in batch_sizes],
    top=[stats['q2'][batch_size]['nrmse'] for batch_size in batch_sizes],
    fill_color="#3B8686",
    line_color="black")
# from mean to Q3
plots['nrmse'].vbar(
    x=batch_sizes,
    width=0.5,
    bottom=[stats['q2'][batch_size]['nrmse'] for batch_size in batch_sizes],
    top=[stats['q3'][batch_size]['nrmse'] for batch_size in batch_sizes],
    fill_color="#E08E79",
    line_color="black")

# segments for inter-quantile description
# lower
plots['nrmse'].segment(
    x0=batch_sizes,
    y0=[stats['lower'][batch_size]['nrmse'] for batch_size in batch_sizes],
    x1=batch_sizes,
    y1=[stats['q1'][batch_size]['nrmse'] for batch_size in batch_sizes],
    line_color='black')
# higher
plots['nrmse'].segment(
    x0=batch_sizes,
    y0=[stats['q3'][batch_size]['nrmse'] for batch_size in batch_sizes],
    x1=batch_sizes,
    y1=[stats['upper'][batch_size]['nrmse'] for batch_size in batch_sizes],
    line_color='black')

# whiskers (almost-0 height rects simpler than segments)
# lower
plots['nrmse'].rect(
    x=batch_sizes,
    y=[stats['lower'][batch_size]['nrmse'] for batch_size in batch_sizes],
    width=0.2,
    height=0.00001,
    line_color="black")
# higher
plots['nrmse'].rect(
    x=batch_sizes,
    y=[stats['upper'][batch_size]['nrmse'] for batch_size in batch_sizes],
    width=0.2,
    height=0.00001,
    line_color="black")

In [61]:
show(row(plots['nd'], plots['nrmse']))