# Load "large" RF Model
Load RF Model from pkl and onnx



In [1]:
import glob
import os
import numpy as np
import pandas as pd
from io import StringIO
import joblib
import time


import onnxruntime as rt

from mlprodict.onnxrt import OnnxInference
from pyquickhelper.helpgen.graphviz_helper import plot_graphviz
import matplotlib.pyplot as plt

In [2]:
RANDOM_SEED = 123

## Model loading

In [3]:
%%time
# collect data on RF tree structure
tree_metrics = []
rf_models = glob.glob('./large_rf_model.pkl')
for model in rf_models:
    # get file sizes
    fp_parts = os.path.splitext(model)
    metrics = {'model': fp_parts[0].split('/')[-1]}
    metrics['sklearn_file_size_mb'] = os.path.getsize(model) / (1024 * 1024)
    metrics['onnx_file_size_mb'] = os.path.getsize(os.path.join(fp_parts[0] + '.onnx')) / (1024 * 1024)
    
    t0 = time.perf_counter()
    sess = rt.InferenceSession(os.path.join(fp_parts[0] + '.onnx'))
    metrics['onnx_load_time_ms'] = 1000 * (time.perf_counter() - t0)
    del sess
    
    # extract tree structure
    t0 = time.perf_counter()
    rf = joblib.load(model)
    metrics['sklearn_load_time_ms'] = 1000 * (time.perf_counter() - t0)
    metrics['number_of_trees'] = len(rf.estimators_)
    tree_depth = [tree.tree_.max_depth for tree in rf.estimators_]
    metrics['tree_min_depth'] = np.min(tree_depth)
    metrics['tree_max_depth'] = np.max(tree_depth)
    metrics['tree_mean_depth'] = np.mean(tree_depth)
    del rf

    # collect metrics
    tree_metrics.append(metrics)

CPU times: user 18.2 s, sys: 2.89 s, total: 21.1 s
Wall time: 20.9 s


In [4]:
# overview of tree structure
tree_metrics_df = pd.DataFrame(tree_metrics)
# reorder the columns
tree_metrics_df = tree_metrics_df[
    [
        'model', 'sklearn_file_size_mb', 'onnx_file_size_mb', 'sklearn_load_time_ms', 'onnx_load_time_ms',
        'number_of_trees', 'tree_min_depth', 'tree_mean_depth', 'tree_max_depth'
    ]
]
tree_metrics_df

Unnamed: 0,model,sklearn_file_size_mb,onnx_file_size_mb,sklearn_load_time_ms,onnx_load_time_ms,number_of_trees,tree_min_depth,tree_mean_depth,tree_max_depth
0,large_rf_model,771.87106,485.680079,1013.354141,18802.051422,1000,22,24.976,32
