### This notebook shows how to convert some sklearn model into onnx and shows it makes the same predictions.

# Do some imports and prepare data

In [1]:
import numpy
from pandas import DataFrame
from tqdm import tqdm
from sklearn import config_context
from sklearn.datasets import make_regression
from sklearn.ensemble import (
    GradientBoostingRegressor, RandomForestRegressor,
    VotingRegressor)
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from onnxruntime import InferenceSession
from skl2onnx import to_onnx


N = 11000
X, y = make_regression(N, n_features=10)
X_train, X_test, y_train, y_test = train_test_split(
    X, y, train_size=0.01)
print("Train shape", X_train.shape)
print("Test shape", X_test.shape)



Train shape (110, 10)
Test shape (10890, 10)


# Create some (overly complicated) machine learning model and make some predictions on a test set

In [2]:
reg1 = GradientBoostingRegressor(random_state=1)
reg2 = RandomForestRegressor(random_state=1)
reg3 = LinearRegression()
model = VotingRegressor([('gb', reg1), ('rf', reg2), ('lr', reg3)])
model.fit(X_train, y_train)

VotingRegressor(estimators=[('gb', GradientBoostingRegressor(random_state=1)),
                            ('rf', RandomForestRegressor(random_state=1)),
                            ('lr', LinearRegression())])

In [3]:
pred = model.predict(X_test)
pred

array([ 361.39147751,    0.53920232,    1.46728941, ...,   83.91634289,
        252.5048443 , -179.44221213])

# Convert to ONNX and use the onnx model to make the same predictions

In [4]:
onx_model = to_onnx(model, X_train[:1].astype(numpy.float32))



In [5]:
import onnxruntime as rt
import numpy
sess = rt.InferenceSession(onx_model.SerializeToString())
input_name = sess.get_inputs()[0].name
label_name = sess.get_outputs()[0].name
pred_onx = sess.run([label_name], {input_name: X_test.astype(numpy.float32)})[0]
pred_onx

array([[ 361.39148  ],
       [   0.5392027],
       [   1.467289 ],
       ...,
       [  83.91634  ],
       [ 252.50485  ],
       [-179.4422   ]], dtype=float32)

# Check if predictions are sort of equal

In [6]:
numpy.allclose(pred, pred_onx[:,0], atol=1e-4)

True