In [1]:
## Training model using sklearn
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier

iris = load_iris()
X, y = iris.data, iris.target
X_train, X_test, y_train, y_test = train_test_split(X, y)

clf = RandomForestClassifier()
clf.fit(X_train, y_train)

In [2]:
## Installing skl2onnx (Library for converting scikit-learn model into ONNX)
#!pip install skl2onnx

In [3]:
## Converting scikit-learn model to ONNX format
from skl2onnx import convert_sklearn
from skl2onnx.common.data_types import FloatTensorType

onnx_model = "rf_iris.onnx"
initial_type = [('float_input', FloatTensorType([None, 4]))]
onx = convert_sklearn(clf, initial_types=initial_type, options={'zipmap': False})

with open(onnx_model, "wb") as f:
    f.write(onx.SerializeToString())



In [4]:
## Installing ONNX Runtime
"""
ONNX Runtime is a cross-platform machine-learning model accelerator, with a flexible interface to integrate hardware-specific libraries. 
ONNX Runtime can be used with models from PyTorch, Tensorflow/Keras, TFLite, scikit-learn, and other frameworks
"""
#!pip install onnxruntime

'\nONNX Runtime is a cross-platform machine-learning model accelerator, with a flexible interface to integrate hardware-specific libraries. \nONNX Runtime can be used with models from PyTorch, Tensorflow/Keras, TFLite, scikit-learn, and other frameworks\n'

In [5]:
## Computing prediction with ONNX Runtime
import onnxruntime as rt
import numpy

sess = rt.InferenceSession("rf_iris.onnx")
input_name = sess.get_inputs()[0].name
label_name = sess.get_outputs()[0].name
pred_onx = sess.run([label_name], {input_name: X_test.astype(numpy.float32)})[0]

In [6]:
## Quantizing ONNX Models (Quantization in ONNX Runtime refers to 8 bit linear quantization of an ONNX model)
import onnx
from onnxruntime.quantization import quantize_dynamic, QuantType

onnx_quantized_model = 'rf_iris.quant.onnx'
quantize_dynamic(onnx_model, 
                 onnx_quantized_model,
                 weight_type=QuantType.QInt8)

In [7]:
## Checking the ONNX model size before and after Quantization
import os
print("Size of ONNX model (KB) : ", os.path.getsize(onnx_model)/1024)
print("Size of ONNX quantized model (KB) : ", os.path.getsize(onnx_quantized_model)/1024)

Size of ONNX model (KB) :  65.6630859375
Size of ONNX quantized model (KB) :  65.873046875


In [8]:
## Not much change in model size after Quantization in this case as we had used default parameters in RandomForest classifier 
## which were mostly 0 or integer type that's why the file size didn't reduce


### **Note:** sklearn-onnx adds a final node ZipMap for every classifier. This node returns probabilities in a maps instead of a matrix. So set parameter `options={'zipmap': False}` while converting sklearn model to ONNX