## consolidate predictions detail and get global metrics

In [1]:
import os
import json
import numpy as np
import pandas as pd
import joblib
from datetime import datetime
from math import sqrt
from sklearn.metrics import mean_squared_error, mean_absolute_error

In [2]:
from bokeh.plotting import figure, show, output_file, save
from bokeh.io import output_notebook
from bokeh.layouts import row, gridplot, layout
from bokeh.palettes import d3
from bokeh.models import Span

output_notebook()

In [3]:
# pd.set_option('display.max_rows', 200)

In [4]:
PROJECT_ROOT = '/home/developer/gcp/cbidmltsf'

In [5]:
data_folder = '{}/datasets/traffic/PEMS-SF'.format(PROJECT_ROOT)
data_folder

'/home/developer/gcp/cbidmltsf/datasets/traffic/PEMS-SF'

In [6]:
# build the station_ids list

In [7]:
def process_list(s, variable_type=int, delimiter=None):
    """Parses a line in the PEMS format to a list."""
    if delimiter is None:
      l = [
          variable_type(i) for i in s.replace('[', '').replace(']', '').split()
      ]
    else:
      l = [
          variable_type(i)
          for i in s.replace('[', '').replace(']', '').split(delimiter)
      ]

    return l

In [8]:
def read_single_list(filename):
    """Returns single list from a file in the PEMS-custom format."""
    with open(os.path.join(data_folder, filename), 'r') as dat:
        l = process_list(dat.readlines()[0])
    return l

In [9]:
station_ids = [id for id in read_single_list('stations_list')]
len(station_ids)

963

In [10]:
sldb_id = 'PEMS-SF_SEPARATED_FULL_BSCTRFM_168_168_07DB_MMX'
sldb_id

'PEMS-SF_SEPARATED_FULL_BSCTRFM_168_168_07DB_MMX'

In [11]:
forecast_window = 24
inference = '{:03d}'.format(forecast_window)

In [12]:
# build a path to the SLDB json file
data_dir = '{}/{}/{}'.format(PROJECT_ROOT, 'sldbs', sldb_id)
data_dir

'/home/developer/gcp/cbidmltsf/sldbs/PEMS-SF_SEPARATED_FULL_BSCTRFM_168_168_07DB_MMX'

In [13]:
# then get the ts_identifier from the json file in the sldb directory
sldb_json_file = '{}/sldb.json'.format(data_dir)

In [14]:
# open the json file
with open(sldb_json_file, 'r') as inputfile:
    sldb_dict = json.load(inputfile)

In [15]:
ts_identifier = sldb_dict['ts']
m = sldb_dict['embedding']['hourly']
t = sldb_dict['no_targets']

In [16]:
ts_identifier, m, t

('PEMS-SF_SEPARATED_FULL', 168, 168)

## load all inference pickle files, then group results per saved model

In [17]:
pickle_path = '{}/inferences/traffic'.format(PROJECT_ROOT)

In [18]:
all_pickle_files = os.listdir(pickle_path)

In [19]:
all_pickle_files.sort()

In [20]:
len(all_pickle_files)

60

In [21]:
# pickle files to be excluded from metric analysis
excluded_pickle_files = [
    'BSCTRFM_TPU_075_02_1643728821_test_024_01.pkl',
    'BSCTRFM_TPU_075_03_1643729170_test_024_01.pkl',
    'BSCTRFM_TPU_075_06_1643730222_test_024_01.pkl',
    'BSCTRFM_TPU_080_00_1643649819_test_024_00.pkl',
    'BSCTRFM_TPU_080_01_1643650236_test_024_00.pkl',
    'BSCTRFM_TPU_080_02_1643650752_test_024_00.pkl',
    'BSCTRFM_TPU_080_04_1643651581_test_024_00.pkl',
    'BSCTRFM_TPU_080_05_1643651979_test_024_00.pkl',
    'BSCTRFM_TPU_080_06_1643652384_test_024_00.pkl',
    'BSCTRFM_TPU_080_07_1643652795_test_024_00.pkl',
    'BSCTRFM_TPU_081_00_1643903671_test_024_01.pkl',
    'BSCTRFM_TPU_081_01_1643903999_test_024_01.pkl',
    'BSCTRFM_TPU_081_02_1643904462_test_024_01.pkl',
    'BSCTRFM_TPU_081_03_1643904942_test_024_01.pkl',
    'BSCTRFM_TPU_081_04_1643905250_test_024_01.pkl',
    'BSCTRFM_TPU_081_05_1643905561_test_024_01.pkl',
    'BSCTRFM_TPU_081_06_1643905866_test_024_01.pkl',
    'BSCTRFM_TPU_081_07_1643906186_test_024_01.pkl',
    'BSCTRFM_TPU_081_08_1643906493_test_024_01.pkl',
    'BSCTRFM_TPU_081_09_1643906797_test_024_01.pkl',
]

In [22]:
len(excluded_pickle_files)

20

In [23]:
# use only selected pickle_files
pickle_files = [file for file in all_pickle_files if file not in excluded_pickle_files]

In [24]:
len(pickle_files)

40

In [25]:
metrics_dataframe_columns = [
    'model_id',
    'execution',
    'saved_model_id',
    'dataset',
    'inference',
    'event',
    'nd',
    'nrmse'
]

In [26]:
metrics_dataframe = pd.DataFrame(columns=metrics_dataframe_columns)

for pickle_file in pickle_files:
       
    inferences_df = pd.read_pickle('{}/{}'.format(pickle_path, pickle_file))

    # buffer_df = pd.DataFrame(columns=metrics_dataframe_columns)
    
    model_id = pickle_file[:15]
    execution = int(pickle_file[16:18])
    saved_model_id = int(pickle_file[19:29])
    dataset = pickle_file[30:34]
    inference = pickle_file[35:38]
    event = int(pickle_file[39:41])     
    
    mae = mean_absolute_error(inferences_df['prediction'], inferences_df['target'])
    true_values_average = np.mean(inferences_df['target'])
    nd = mae/true_values_average
    rmse = sqrt(mean_squared_error(inferences_df['prediction'], inferences_df['target']))
    nrmse = rmse/true_values_average
    
    row_dict = {
        'model_id': model_id,
        'execution': execution,
        'saved_model_id': saved_model_id,
        'dataset': dataset,
        'inference': inference,
        'event': event,
        'nd': nd,
        'nrmse': nrmse
    }
    
    metrics_dataframe = metrics_dataframe.append(row_dict, ignore_index = True)

In [27]:
metrics_dataframe

Unnamed: 0,model_id,execution,saved_model_id,dataset,inference,event,nd,nrmse
0,BSCTRFM_TPU_073,0,1649257741,test,24,1,0.17451,0.424853
1,BSCTRFM_TPU_073,1,1649344388,test,24,1,0.171399,0.4256
2,BSCTRFM_TPU_073,2,1649344771,test,24,1,0.163412,0.40577
3,BSCTRFM_TPU_073,3,1649345163,test,24,1,0.168061,0.424414
4,BSCTRFM_TPU_073,4,1649345552,test,24,1,0.217077,0.460627
5,BSCTRFM_TPU_073,5,1649345937,test,24,1,0.153402,0.414934
6,BSCTRFM_TPU_073,6,1649346340,test,24,1,0.178945,0.42162
7,BSCTRFM_TPU_073,7,1649346725,test,24,1,0.159544,0.420988
8,BSCTRFM_TPU_073,8,1649347143,test,24,1,0.199419,0.458836
9,BSCTRFM_TPU_073,9,1649347597,test,24,1,0.143812,0.406606


In [28]:
metrics = dict()

# 128 at float32
batch_size = '128'
metrics[batch_size] = metrics_dataframe[metrics_dataframe['model_id'] == 'BSCTRFM_TPU_073'].copy()

# 256 at float32
batch_size = '256'
metrics[batch_size] = metrics_dataframe[metrics_dataframe['model_id'] == 'BSCTRFM_TPU_075'].copy()

# 512 at float32
batch_size = '512'
metrics[batch_size] = metrics_dataframe[metrics_dataframe['model_id'] == 'BSCTRFM_TPU_077'].copy()

# 1024 at float32
batch_size = '1024'
metrics[batch_size] = metrics_dataframe[metrics_dataframe['model_id'] == 'BSCTRFM_TPU_079'].copy()

In [29]:
metrics['128'].describe()

Unnamed: 0,nd,nrmse
count,10.0,10.0
mean,0.172958,0.426425
std,0.021639,0.018949
min,0.143812,0.40577
25%,0.160511,0.416448
50%,0.16973,0.423017
75%,0.177836,0.425413
max,0.217077,0.460627


In [30]:
metrics['256'].describe()

Unnamed: 0,nd,nrmse
count,10.0,10.0
mean,0.166745,0.422521
std,0.019281,0.014996
min,0.141908,0.406144
25%,0.154436,0.413914
50%,0.160642,0.418189
75%,0.177683,0.428087
max,0.198078,0.455805


In [31]:
metrics['512'].describe()

Unnamed: 0,nd,nrmse
count,10.0,10.0
mean,0.180517,0.432615
std,0.019792,0.014773
min,0.153254,0.408334
25%,0.162231,0.419813
50%,0.180424,0.435648
75%,0.196844,0.444179
max,0.2091,0.451007


In [32]:
metrics['1024'].describe()

Unnamed: 0,nd,nrmse
count,10.0,10.0
mean,0.185072,0.442491
std,0.029071,0.028999
min,0.155108,0.413221
25%,0.16281,0.424208
50%,0.176902,0.434226
75%,0.194567,0.451553
max,0.243202,0.501385


In [42]:
# initialize the statistics dictionary
stats = {
    'qmin': {'128': None, '256': None, '512': None, '1024': None},
    'q1': {'128': None, '256': None, '512': None, '1024': None},
    'q2': {'128': None, '256': None, '512': None, '1024': None},
    'q3': {'128': None, '256': None, '512': None, '1024': None},
    'qmax': {'128': None, '256': None, '512': None, '1024': None},
    'iqr': {'128': None, '256': None, '512': None, '1024': None},
    'upper': {'128': None, '256': None, '512': None, '1024': None},
    'lower': {'128': None, '256': None, '512': None, '1024': None},
}

In [43]:
batch_sizes = ['128', '256', '512', '1024']

In [44]:
for batch_size in batch_sizes:
    stats['qmin'][batch_size] = metrics[batch_size].quantile(q=0.)
    stats['q1'][batch_size] = metrics[batch_size].quantile(q=0.25)
    stats['q2'][batch_size] = metrics[batch_size].quantile(q=0.5)
    stats['q3'][batch_size] = metrics[batch_size].quantile(q=0.75)
    stats['qmax'][batch_size] = metrics[batch_size].quantile(q=1.)
    stats['iqr'][batch_size] = stats['q3'][batch_size] - stats['q1'][batch_size]
    stats['upper'][batch_size] = stats['q3'][batch_size] + 1.5*stats['iqr'][batch_size]
    stats['lower'][batch_size] = stats['q1'][batch_size] - 1.5*stats['iqr'][batch_size]

In [45]:
# are there outliers for ND or NRMSE (values outside the inter-quantile range)?

In [46]:
for batch_size in batch_sizes:
    num_outliers = np.sum((metrics[batch_size]['nd'] < stats['lower'][batch_size]['nd']) \
    | \
    (metrics[batch_size]['nd'] > stats['upper'][batch_size]['nd']))

    print('The number of outliers for ND at batch size {} is {}'.format(batch_size, num_outliers))

The number of outliers for ND at batch size 128 is 1
The number of outliers for ND at batch size 256 is 0
The number of outliers for ND at batch size 512 is 0
The number of outliers for ND at batch size 1024 is 1


In [47]:
for batch_size in batch_sizes:
    num_outliers = np.sum((metrics[batch_size]['nrmse'] < stats['lower'][batch_size]['nrmse']) \
    | \
    (metrics[batch_size]['nrmse'] > stats['upper'][batch_size]['nrmse']))

    print('The number of outliers for NRMSE at batch size {} is {}'.format(batch_size, num_outliers))

The number of outliers for NRMSE at batch size 128 is 2
The number of outliers for NRMSE at batch size 256 is 1
The number of outliers for NRMSE at batch size 512 is 0
The number of outliers for NRMSE at batch size 1024 is 1


In [None]:
# ToDo: plot outliers in boxplots?

In [48]:
from bokeh.plotting import figure, show
from bokeh.layouts import row
output_notebook()

In [49]:
plots = dict()

In [50]:
plots['nd'] = figure(
    width=460,
    height=460,
    x_range=batch_sizes,
    # background_fill_color="#efefef",
    toolbar_location=None)

# plots['nd'].xaxis.axis_label = "Batch Size"
# plots['nd'].yaxis.axis_label = "ND"
plots['nd'].axis.axis_label_text_font_style = "bold"
plots['nd'].axis.axis_label_text_font_size = "12pt"

# vertical bars for quantile description
# from Q1 to mean
plots['nd'].vbar(
    x=batch_sizes,
    width=0.5,
    bottom=[stats['q1'][batch_size]['nd'] for batch_size in batch_sizes],
    top=[stats['q2'][batch_size]['nd'] for batch_size in batch_sizes],
    fill_color="#3B8686",
    line_color="black")
# from mean to Q3
plots['nd'].vbar(
    x=batch_sizes,
    width=0.5,
    bottom=[stats['q2'][batch_size]['nd'] for batch_size in batch_sizes],
    top=[stats['q3'][batch_size]['nd'] for batch_size in batch_sizes],
    fill_color="#E08E79",
    line_color="black")

# segments for inter-quantile description
# lower
plots['nd'].segment(
    x0=batch_sizes,
    y0=[stats['lower'][batch_size]['nd'] for batch_size in batch_sizes],
    x1=batch_sizes,
    y1=[stats['q1'][batch_size]['nd'] for batch_size in batch_sizes],
    line_color='black')
# higher
plots['nd'].segment(
    x0=batch_sizes,
    y0=[stats['q3'][batch_size]['nd'] for batch_size in batch_sizes],
    x1=batch_sizes,
    y1=[stats['upper'][batch_size]['nd'] for batch_size in batch_sizes],
    line_color='black')

# whiskers (almost-0 height rects simpler than segments)
# lower
plots['nd'].rect(
    x=batch_sizes,
    y=[stats['lower'][batch_size]['nd'] for batch_size in batch_sizes],
    width=0.2,
    height=0.00001,
    line_color="black")
# higher
plots['nd'].rect(
    x=batch_sizes,
    y=[stats['upper'][batch_size]['nd'] for batch_size in batch_sizes],
    width=0.2,
    height=0.00001,
    line_color="black")

In [51]:
plots['nrmse'] = figure(
    width=460,
    height=460,
    x_range=batch_sizes,
    # background_fill_color="#efefef",
    toolbar_location=None)

# plots['nrmse'].yaxis.axis_label = "NRMSE"
# plots['nrmse'].xaxis.axis_label = "Batch Size"
plots['nrmse'].axis.axis_label_text_font_style = "bold"
plots['nrmse'].axis.axis_label_text_font_size = "12pt"

# vertical bars for quantile description
# from Q1 to mean
plots['nrmse'].vbar(
    x=batch_sizes,
    width=0.5,
    bottom=[stats['q1'][batch_size]['nrmse'] for batch_size in batch_sizes],
    top=[stats['q2'][batch_size]['nrmse'] for batch_size in batch_sizes],
    fill_color="#3B8686",
    line_color="black")
# from mean to Q3
plots['nrmse'].vbar(
    x=batch_sizes,
    width=0.5,
    bottom=[stats['q2'][batch_size]['nrmse'] for batch_size in batch_sizes],
    top=[stats['q3'][batch_size]['nrmse'] for batch_size in batch_sizes],
    fill_color="#E08E79",
    line_color="black")

# segments for inter-quantile description
# lower
plots['nrmse'].segment(
    x0=batch_sizes,
    y0=[stats['lower'][batch_size]['nrmse'] for batch_size in batch_sizes],
    x1=batch_sizes,
    y1=[stats['q1'][batch_size]['nrmse'] for batch_size in batch_sizes],
    line_color='black')
# higher
plots['nrmse'].segment(
    x0=batch_sizes,
    y0=[stats['q3'][batch_size]['nrmse'] for batch_size in batch_sizes],
    x1=batch_sizes,
    y1=[stats['upper'][batch_size]['nrmse'] for batch_size in batch_sizes],
    line_color='black')

# whiskers (almost-0 height rects simpler than segments)
# lower
plots['nrmse'].rect(
    x=batch_sizes,
    y=[stats['lower'][batch_size]['nrmse'] for batch_size in batch_sizes],
    width=0.2,
    height=0.00001,
    line_color="black")
# higher
plots['nrmse'].rect(
    x=batch_sizes,
    y=[stats['upper'][batch_size]['nrmse'] for batch_size in batch_sizes],
    width=0.2,
    height=0.00001,
    line_color="black")

In [52]:
show(row(plots['nd'], plots['nrmse']))