# Prototype Bemchmark
For each record to be scored, the "county" specific model is read from external storage.  No model caching in memory.

* `model_load_time` (seconds) is the time required to read a model file from external storage and deserialize the model object.
* `model_score_time` (seconds) is the time to score one record.

## Metrics reported in this notebook are from synthetic data and **have not** been calibrated to representative dataset or model sizes.

## Notebook run-time enviornment
* **Hardware:** MacBook Pro Intel(2019), 16GB RAM, 1TB SSD drive
* **OS:** MacOS 11.6.1
* **Docker:** Docker for Desktop 4.2.0 (Mac)
* **Docker Image:** Base image: `jupyter/datascience-notebook:lab-3.2.5` with ONNX packages added

In [1]:
import glob
import os
import shutil
import pandas as pd
import numpy as np
import onnxruntime as rt
import pickle

## Change to project root directory

In [2]:
# required to allow for import of project speccific utility functions
os.chdir('..')

In [3]:
# import project specific utiity functions
from utils.utils import BenchmarkDriver, load_config, actualsize_mb

In [4]:
# get configuration parameters
config = load_config('./config.yaml')
config

{'data_dir': '/Users/jim/Desktop/onnx_sandbox/data',
 'models_dir': '/Users/jim/Desktop/onnx_sandbox/models',
 'number_records': 100000,
 'number_features': 20,
 'number_informative': 14,
 'fraction_for_test': 0.2,
 'number_counties': 20,
 'random_seed': 123}

In [5]:
TEST_DATA = os.path.join(config['data_dir'],'benchmark', 'test.parquet')
PERFORMANCE_DATA_DIR = os.path.join(config['data_dir'],'performance')
MODELS_DIR = os.path.join(config['models_dir'], 'benchmark')


In [6]:
# setup directory to collect performance data
shutil.rmtree(PERFORMANCE_DATA_DIR, ignore_errors=True)
os.makedirs(PERFORMANCE_DATA_DIR, exist_ok=True)

## Analysze RF Tree Structure

In [7]:
# collect metrics on RF tree structure
tree_metrics = []
rf_models = glob.glob(os.path.join(MODELS_DIR, '*.pkl'))
for model in rf_models:
    # get file sizes
    fp_parts = os.path.splitext(model)
    metrics = {'model': fp_parts[0].split('/')[-1]}
    metrics['sklearn_file_size_mb'] = os.path.getsize(model) / (1024 * 1024)
    metrics['onnx_file_size_mb'] = os.path.getsize(fp_parts[0] + '.onnx') / (1024 * 1024)
    
    # extract tree structure
    with open(model, 'rb') as f:
        rf = pickle.load(f)
    metrics['sklearn_in_memory_mb'] = actualsize_mb(rf)
    metrics['number_of_estimators'] = len(rf.estimators_)
    tree_depth = [tree.tree_.max_depth for tree in rf.estimators_]
    metrics['min_depth'] = np.min(tree_depth)
    metrics['max_depth'] = np.max(tree_depth)
    metrics['mean_depth'] = np.mean(tree_depth)
    
    # get onnx in memory size
    onnx_rf = rt.InferenceSession(fp_parts[0] + '.onnx')
    metrics['onnx_in_memory_mb'] = actualsize_mb(onnx_rf)
    
    del rf
    del onnx_rf
        
    # collect metrics
    tree_metrics.append(metrics)

In [8]:
# overview of tree structure
tree_metrics_df = pd.DataFrame(tree_metrics)
tree_metrics_df

Unnamed: 0,model,sklearn_file_size_mb,onnx_file_size_mb,sklearn_in_memory_mb,number_of_estimators,min_depth,max_depth,mean_depth,onnx_in_memory_mb
0,cnty0000,31.296801,19.027223,54.2936,100,20,25,22.52,54.2964
1,cnty0001,30.882494,18.774771,54.2955,100,20,27,21.94,54.2971
2,cnty0002,30.513475,18.54988,54.2961,100,20,26,22.24,54.2977
3,cnty0003,31.265795,19.008325,54.2967,100,20,30,22.79,54.2983
4,cnty0004,30.445482,18.508421,54.2973,100,20,27,22.15,54.2989
5,cnty0005,31.025072,18.861608,54.2979,100,20,27,22.35,54.2995
6,cnty0006,30.592455,18.598022,54.2985,100,20,27,22.32,54.3001
7,cnty0007,30.534838,18.562901,54.2991,100,19,25,21.91,54.3007
8,cnty0008,31.118578,18.918609,54.2997,100,19,29,22.42,54.3013
9,cnty0009,30.592455,18.597977,54.3003,100,19,27,22.53,54.3019


In [9]:
tree_metrics_of_interest = ['sklearn_file_size_mb', 'onnx_file_size_mb', 'sklearn_in_memory_mb', 'onnx_in_memory_mb',
                           'number_of_estimators', 'min_depth', 'mean_depth', 'max_depth']
tree_metrics_df[tree_metrics_of_interest].describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
sklearn_file_size_mb,20.0,30.897905,0.415385,30.369677,30.578051,30.805956,31.124498,31.940233
onnx_file_size_mb,20.0,18.784136,0.253127,18.462207,18.589208,18.728096,18.922213,19.419286
sklearn_in_memory_mb,20.0,54.301115,0.004378,54.2936,54.29775,54.30095,54.30415,54.3093
onnx_in_memory_mb,20.0,54.30289,0.004436,54.2964,54.29935,54.30255,54.30575,54.3109
number_of_estimators,20.0,100.0,0.0,100.0,100.0,100.0,100.0,100.0
min_depth,20.0,19.8,0.410391,19.0,20.0,20.0,20.0,20.0
mean_depth,20.0,22.3625,0.284788,21.84,22.2175,22.37,22.53,22.89
max_depth,20.0,27.0,1.65434,25.0,26.0,27.0,27.25,31.0


## Get some test data

In [10]:
test_df = pd.read_parquet(TEST_DATA)
test_df = test_df.sample(n=100, random_state=config['random_seed'])
test_df.reset_index(inplace=True)
test_df.shape

(100, 23)

In [11]:
test_df[['index','county', 'y']].head(10)

Unnamed: 0,index,county,y
0,12136,cnty0015,-31.640778
1,16812,cnty0004,123.967995
2,10072,cnty0011,-59.197975
3,5850,cnty0010,-154.407074
4,4320,cnty0014,5.997156
5,4429,cnty0003,37.982521
6,11744,cnty0011,-19.720573
7,12106,cnty0011,271.501892
8,4230,cnty0019,140.779358
9,16858,cnty0002,45.374973


In [12]:
metrics_of_interest = ['model_memory_size_mb', 'model_load_time', 'model_score_time', 'model_prediction_time']

## Sklearn test

In [13]:
%%time
sklearn_driver = BenchmarkDriver(
    model_type='sklearn',
    models_dir=MODELS_DIR,
    performance_fp=os.path.join(PERFORMANCE_DATA_DIR, 'sklearn_benchmark.csv'),
    test_scenario='county-level'
)

# iterate over each row and collect run-time performance statistics
for idx, row in test_df.iterrows():
    one_record = pd.DataFrame(row).T
    sklearn_driver.score_one_record(row['county'], row['index'],
            one_record.drop(['index', 'county', 'y'], axis='columns'))
    
sklearn_driver.close_performance_data()

CPU times: user 34.1 s, sys: 1.23 s, total: 35.3 s
Wall time: 35.1 s


In [14]:
# display collected performance metrics
sklearn_metrics_df = pd.read_csv(os.path.join(PERFORMANCE_DATA_DIR, 'sklearn_benchmark.csv'))
sklearn_metrics_df['model_prediction_time'] = sklearn_metrics_df['model_load_time'] + sklearn_metrics_df['model_score_time']
sklearn_metrics_df.head(10)

Unnamed: 0,scenario,record_id,model_type,county_id,model_memory_size_mb,model_load_time,model_score_time,prediction,model_prediction_time
0,county-level,12136,sklearn,cnty0015,54.9157,0.013378,0.030499,-116.880853,0.043878
1,county-level,16812,sklearn,cnty0004,54.9457,0.013398,0.027972,153.039992,0.041371
2,county-level,10072,sklearn,cnty0011,54.9469,0.012978,0.02539,-30.62064,0.038368
3,county-level,5850,sklearn,cnty0010,54.9484,0.011517,0.026968,-89.091659,0.038485
4,county-level,4320,sklearn,cnty0014,54.9489,0.012079,0.025177,79.284982,0.037256
5,county-level,4429,sklearn,cnty0003,54.9492,0.012126,0.02643,31.401549,0.038556
6,county-level,11744,sklearn,cnty0011,54.9503,0.010962,0.027359,-5.722466,0.038322
7,county-level,12106,sklearn,cnty0011,54.9514,0.012482,0.025612,139.301654,0.038094
8,county-level,4230,sklearn,cnty0019,54.952,0.012992,0.02633,103.776609,0.039323
9,county-level,16858,sklearn,cnty0002,54.9525,0.013297,0.025806,72.877299,0.039103


In [15]:
sklearn_metrics_df[metrics_of_interest].describe(percentiles=[.25, .5, .75, .9]).T

Unnamed: 0,count,mean,std,min,25%,50%,75%,90%,max
model_memory_size_mb,100.0,54.973219,0.012577,54.9157,54.9672,54.9774,54.982025,54.98541,54.9882
model_load_time,100.0,0.011716,0.001022,0.010851,0.011187,0.011442,0.011942,0.012493,0.019412
model_score_time,100.0,0.024762,0.00151,0.021973,0.023816,0.024347,0.025516,0.026541,0.030499
model_prediction_time,100.0,0.036478,0.001981,0.033681,0.035191,0.0359,0.037264,0.038798,0.045084


## ONNX Test

In [16]:
%%time
onnx_driver = BenchmarkDriver(
    model_type='onnx',
    models_dir=MODELS_DIR,
    performance_fp=os.path.join(PERFORMANCE_DATA_DIR, 'onnx_benchmark.csv'),
    test_scenario='county-level'
)

# iterate over each row and collect run-time performance statistics
for idx, row in test_df.iterrows():
    one_record = pd.DataFrame(row).T
    onnx_driver.score_one_record(row['county'], row['index'],
            one_record.drop(['index', 'county', 'y'], axis='columns').astype(np.float32).to_numpy())
    
onnx_driver.close_performance_data()

CPU times: user 1min 30s, sys: 1.62 s, total: 1min 32s
Wall time: 1min 26s


In [17]:
# display collected performance metrics
onnx_metrics_df = pd.read_csv(os.path.join(PERFORMANCE_DATA_DIR, 'onnx_benchmark.csv'))
onnx_metrics_df['model_prediction_time'] = onnx_metrics_df['model_load_time'] + onnx_metrics_df['model_score_time']
onnx_metrics_df.head(10)

Unnamed: 0,scenario,record_id,model_type,county_id,model_memory_size_mb,model_load_time,model_score_time,prediction,model_prediction_time
0,county-level,12136,onnx,cnty0015,55.0394,0.527848,0.000273,-116.880859,0.52812
1,county-level,16812,onnx,cnty0004,55.0403,0.505866,0.000229,153.039993,0.506095
2,county-level,10072,onnx,cnty0011,55.0404,0.500752,0.000232,-30.620642,0.500984
3,county-level,5850,onnx,cnty0010,55.0406,0.509707,0.000227,-89.091667,0.509933
4,county-level,4320,onnx,cnty0014,55.0408,0.493217,0.000236,79.284981,0.493452
5,county-level,4429,onnx,cnty0003,55.0409,0.502158,0.000234,31.401548,0.502392
6,county-level,11744,onnx,cnty0011,55.0411,0.506335,0.000232,-5.722466,0.506567
7,county-level,12106,onnx,cnty0011,55.0412,0.50253,0.000223,139.301636,0.502753
8,county-level,4230,onnx,cnty0019,55.0414,0.518826,0.000255,103.776588,0.519081
9,county-level,16858,onnx,cnty0002,55.0415,0.489454,0.000238,72.877312,0.489692


In [18]:
onnx_metrics_df[metrics_of_interest].describe(percentiles=[.25, .5, .75, .9]).T

Unnamed: 0,count,mean,std,min,25%,50%,75%,90%,max
model_memory_size_mb,100.0,55.0502,0.007198,55.0394,55.04445,55.04895,55.053725,55.06291,55.0644
model_load_time,100.0,0.502199,0.015319,0.480112,0.494498,0.498623,0.505983,0.516333,0.586258
model_score_time,100.0,0.000246,2e-05,0.000211,0.000233,0.000243,0.000255,0.000272,0.000328
model_prediction_time,100.0,0.502444,0.015323,0.480366,0.494762,0.498868,0.506213,0.516568,0.58651


## Differences in sklearn vs onnx predictions

In [19]:
differences = np.abs(sklearn_metrics_df['prediction'] - onnx_metrics_df['prediction'])
differences.describe()

count    1.000000e+02
mean     1.306674e-05
std      1.780947e-05
min      1.621246e-07
25%      1.949072e-06
50%      6.562620e-06
75%      1.712799e-05
max      1.052976e-04
Name: prediction, dtype: float64