# Prototype Bemchmark
For each record to be scored, the "county" specific model is read from external storage.  No model caching in memory.

* `model_load_time` (seconds) is the time required to read a model file from external storage and deserialize the model object.
* `model_score_time` (seconds) is the time to score one record.

## Metrics reported in this notebook are from synthetic data and **have not** been calibrated to representative dataset or model sizes.

## Notebook run-time enviornment
* **Hardware:** MacBook Pro Intel(2019), 16GB RAM, 1TB SSD drive
* **OS:** MacOS 11.6.1
* **Docker:** Docker for Desktop 4.2.0 (Mac)
* **Docker Image:** Base image: `jupyter/datascience-notebook:lab-3.2.5` with ONNX packages added

In [1]:
import glob
import os
import shutil
import pandas as pd
import numpy as np
import onnxruntime as rt
import pickle

## Setup for tests

In [2]:
# required to allow for import of project speccific utility functions
os.chdir('..')

In [3]:
# import project specific utiity functions
from utils.utils import BenchmarkDriver, load_config, actualsize_mb

In [4]:
# get configuration parameters
config = load_config('./config.yaml')
config

{'data_dir': '/Users/jim/Desktop/onnx_sandbox/data',
 'models_dir': '/Users/jim/Desktop/onnx_sandbox/models',
 'number_records': 100000,
 'number_features': 20,
 'number_informative': 14,
 'fraction_for_test': 0.2,
 'number_counties': 20,
 'random_seed': 123}

In [5]:
TEST_DATA = os.path.join(config['data_dir'],'benchmark', 'test.parquet')
PERFORMANCE_DATA_DIR = os.path.join(config['data_dir'],'performance')
MODELS_DIR = os.path.join(config['models_dir'], 'benchmark')


In [6]:
# setup directory to collect performance data
shutil.rmtree(PERFORMANCE_DATA_DIR, ignore_errors=True)
os.makedirs(PERFORMANCE_DATA_DIR, exist_ok=True)

## Analysze RF Tree Structure

In [7]:
# collect metrics on RF tree structure
tree_metrics = []
rf_models = glob.glob(os.path.join(MODELS_DIR, '*.pkl'))
for model in rf_models:
    # get file sizes
    fp_parts = os.path.splitext(model)
    metrics = {'model': fp_parts[0].split('/')[-1]}
    metrics['sklearn_file_size_mb'] = os.path.getsize(model) / (1024 * 1024)
    metrics['onnx_file_size_mb'] = os.path.getsize(fp_parts[0] + '.onnx') / (1024 * 1024)
    
    # extract tree structure
    with open(model, 'rb') as f:
        rf = pickle.load(f)
    metrics['sklearn_in_memory_mb'] = actualsize_mb(rf)
    metrics['number_of_estimators'] = len(rf.estimators_)
    tree_depth = [tree.tree_.max_depth for tree in rf.estimators_]
    metrics['min_depth'] = np.min(tree_depth)
    metrics['max_depth'] = np.max(tree_depth)
    metrics['mean_depth'] = np.mean(tree_depth)
    
    # get onnx in memory size
    onnx_rf = rt.InferenceSession(fp_parts[0] + '.onnx')
    metrics['onnx_in_memory_mb'] = actualsize_mb(onnx_rf)
    
    del rf
    del onnx_rf
        
    # collect metrics
    tree_metrics.append(metrics)

In [8]:
# overview of tree structure
tree_metrics_df = pd.DataFrame(tree_metrics)
tree_metrics_df

Unnamed: 0,model,sklearn_file_size_mb,onnx_file_size_mb,sklearn_in_memory_mb,number_of_estimators,min_depth,max_depth,mean_depth,onnx_in_memory_mb
0,cnty0000,31.296801,19.027223,54.2946,100,20,25,22.52,54.2975
1,cnty0001,30.882494,18.774771,54.2965,100,20,27,21.94,54.2981
2,cnty0002,30.513475,18.54988,54.2971,100,20,26,22.24,54.2987
3,cnty0003,31.265795,19.008325,54.2977,100,20,30,22.79,54.2993
4,cnty0004,30.445482,18.508421,54.2983,100,20,27,22.15,54.2999
5,cnty0005,31.025072,18.861608,54.2989,100,20,27,22.35,54.3005
6,cnty0006,30.592455,18.598022,54.2995,100,20,27,22.32,54.3011
7,cnty0007,30.534838,18.562901,54.3001,100,19,25,21.91,54.3017
8,cnty0008,31.118578,18.918609,54.3007,100,19,29,22.42,54.3023
9,cnty0009,30.592455,18.597977,54.3014,100,19,27,22.53,54.303


In [9]:
tree_metrics_of_interest = ['sklearn_file_size_mb', 'onnx_file_size_mb', 'sklearn_in_memory_mb', 'onnx_in_memory_mb',
                           'number_of_estimators', 'min_depth', 'mean_depth', 'max_depth']
tree_metrics_df[tree_metrics_of_interest].describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
sklearn_file_size_mb,20.0,30.897905,0.415385,30.369677,30.578051,30.805956,31.124498,31.940233
onnx_file_size_mb,20.0,18.784136,0.253127,18.462207,18.589208,18.728096,18.922213,19.419286
sklearn_in_memory_mb,20.0,54.302135,0.004402,54.2946,54.29875,54.302,54.30515,54.3104
onnx_in_memory_mb,20.0,54.303915,0.004453,54.2975,54.30035,54.3036,54.30675,54.312
number_of_estimators,20.0,100.0,0.0,100.0,100.0,100.0,100.0,100.0
min_depth,20.0,19.8,0.410391,19.0,20.0,20.0,20.0,20.0
mean_depth,20.0,22.3625,0.284788,21.84,22.2175,22.37,22.53,22.89
max_depth,20.0,27.0,1.65434,25.0,26.0,27.0,27.25,31.0


## Get some test data

In [10]:
test_df = pd.read_parquet(TEST_DATA)
test_df = test_df.sample(n=100, random_state=config['random_seed'])
test_df.reset_index(inplace=True)
test_df.shape

(100, 23)

In [11]:
test_df[['index','county', 'y']].head(10)

Unnamed: 0,index,county,y
0,12136,cnty0015,-31.640778
1,16812,cnty0004,123.967995
2,10072,cnty0011,-59.197975
3,5850,cnty0010,-154.407074
4,4320,cnty0014,5.997156
5,4429,cnty0003,37.982521
6,11744,cnty0011,-19.720573
7,12106,cnty0011,271.501892
8,4230,cnty0019,140.779358
9,16858,cnty0002,45.374973


In [12]:
metrics_of_interest = ['model_memory_size_mb', 'model_load_time', 'model_score_time', 'model_prediction_time']

## Sklearn test

In [13]:
%%time
sklearn_driver = BenchmarkDriver(
    model_type='sklearn',
    models_dir=MODELS_DIR,
    performance_fp=os.path.join(PERFORMANCE_DATA_DIR, 'sklearn_benchmark.csv'),
    test_scenario='county-level'
)

# iterate over each row and collect run-time performance statistics
for idx, row in test_df.iterrows():
    one_record = pd.DataFrame(row).T
    sklearn_driver.score_one_record(row['county'], row['index'],
            one_record.drop(['index', 'county', 'y'], axis='columns'))
    
sklearn_driver.close_performance_data()

CPU times: user 30 s, sys: 1.57 s, total: 31.6 s
Wall time: 31.5 s


In [14]:
# display collected performance metrics
sklearn_metrics_df = pd.read_csv(os.path.join(PERFORMANCE_DATA_DIR, 'sklearn_benchmark.csv'))
sklearn_metrics_df['model_prediction_time'] = sklearn_metrics_df['model_load_time'] + sklearn_metrics_df['model_score_time']
sklearn_metrics_df.head(10)

Unnamed: 0,scenario,record_id,model_type,county_id,model_memory_size_mb,model_load_time,model_score_time,prediction,model_prediction_time
0,county-level,12136,sklearn,cnty0015,54.9163,0.012479,0.024189,-116.880853,0.036668
1,county-level,16812,sklearn,cnty0004,54.9461,0.01071,0.024462,153.039992,0.035172
2,county-level,10072,sklearn,cnty0011,54.9474,0.011193,0.022618,-30.62064,0.03381
3,county-level,5850,sklearn,cnty0010,54.9489,0.010648,0.021066,-89.091659,0.031713
4,county-level,4320,sklearn,cnty0014,54.9494,0.010582,0.02181,79.284982,0.032392
5,county-level,4429,sklearn,cnty0003,54.9497,0.011952,0.021849,31.401549,0.033802
6,county-level,11744,sklearn,cnty0011,54.9508,0.010698,0.021405,-5.722466,0.032103
7,county-level,12106,sklearn,cnty0011,54.9519,0.010812,0.021186,139.301654,0.031998
8,county-level,4230,sklearn,cnty0019,54.9526,0.012683,0.021898,103.776609,0.034581
9,county-level,16858,sklearn,cnty0002,54.9531,0.01102,0.022988,72.877299,0.034007


In [15]:
sklearn_metrics_df[metrics_of_interest].describe(percentiles=[.25, .5, .75, .9]).T

Unnamed: 0,count,mean,std,min,25%,50%,75%,90%,max
model_memory_size_mb,100.0,54.973732,0.012573,54.9163,54.967675,54.9779,54.982425,54.98592,54.9887
model_load_time,100.0,0.011417,0.000927,0.010582,0.010868,0.011073,0.011472,0.01333,0.014773
model_score_time,100.0,0.021743,0.001133,0.020091,0.021119,0.021515,0.022134,0.02296,0.028102
model_prediction_time,100.0,0.033161,0.001479,0.030887,0.032151,0.032762,0.033741,0.035229,0.038927


## ONNX Test

In [16]:
%%time
onnx_driver = BenchmarkDriver(
    model_type='onnx',
    models_dir=MODELS_DIR,
    performance_fp=os.path.join(PERFORMANCE_DATA_DIR, 'onnx_benchmark.csv'),
    test_scenario='county-level'
)

# iterate over each row and collect run-time performance statistics
for idx, row in test_df.iterrows():
    one_record = pd.DataFrame(row).T
    onnx_driver.score_one_record(row['county'], row['index'],
            one_record.drop(['index', 'county', 'y'], axis='columns').astype(np.float32).to_numpy())
    
onnx_driver.close_performance_data()

CPU times: user 1min 24s, sys: 1.94 s, total: 1min 26s
Wall time: 1min 21s


In [17]:
# display collected performance metrics
onnx_metrics_df = pd.read_csv(os.path.join(PERFORMANCE_DATA_DIR, 'onnx_benchmark.csv'))
onnx_metrics_df['model_prediction_time'] = onnx_metrics_df['model_load_time'] + onnx_metrics_df['model_score_time']
onnx_metrics_df.head(10)

Unnamed: 0,scenario,record_id,model_type,county_id,model_memory_size_mb,model_load_time,model_score_time,prediction,model_prediction_time
0,county-level,12136,onnx,cnty0015,55.0405,0.517916,0.000401,-116.880859,0.518318
1,county-level,16812,onnx,cnty0004,55.0413,0.467021,0.000199,153.039993,0.46722
2,county-level,10072,onnx,cnty0011,55.0415,0.473089,0.000208,-30.620642,0.473297
3,county-level,5850,onnx,cnty0010,55.0417,0.468602,0.000194,-89.091667,0.468796
4,county-level,4320,onnx,cnty0014,55.0418,0.483976,0.000204,79.284981,0.48418
5,county-level,4429,onnx,cnty0003,55.0419,0.485849,0.000218,31.401548,0.486067
6,county-level,11744,onnx,cnty0011,55.0421,0.480355,0.000203,-5.722466,0.480558
7,county-level,12106,onnx,cnty0011,55.0422,0.505961,0.000197,139.301636,0.506158
8,county-level,4230,onnx,cnty0019,55.0424,0.490614,0.000203,103.776588,0.490817
9,county-level,16858,onnx,cnty0002,55.0426,0.478643,0.000204,72.877312,0.478846


In [18]:
onnx_metrics_df[metrics_of_interest].describe(percentiles=[.25, .5, .75, .9]).T

Unnamed: 0,count,mean,std,min,25%,50%,75%,90%,max
model_memory_size_mb,100.0,55.051146,0.007077,55.0405,55.045475,55.05,55.05475,55.06381,55.0653
model_load_time,100.0,0.480913,0.011301,0.460671,0.473813,0.478895,0.485991,0.495124,0.519201
model_score_time,100.0,0.000218,2.7e-05,0.000185,0.000203,0.000212,0.000229,0.00024,0.000401
model_prediction_time,100.0,0.481131,0.011309,0.460909,0.474014,0.479119,0.486205,0.495325,0.519404


## Differences in sklearn vs onnx predictions

In [19]:
differences = np.abs(sklearn_metrics_df['prediction'] - onnx_metrics_df['prediction'])
differences.describe()

count    1.000000e+02
mean     1.306674e-05
std      1.780947e-05
min      1.621246e-07
25%      1.949072e-06
50%      6.562620e-06
75%      1.712799e-05
max      1.052976e-04
Name: prediction, dtype: float64