In [2]:
import joblib, pickle
from s2and_ext.my_models import LightGBMWrapper

from skl2onnx import convert_sklearn, update_registered_converter
from skl2onnx.common.shape_calculator import calculate_linear_classifier_output_shapes
from skl2onnx.common.data_types import FloatTensorType
from onnxmltools.convert.lightgbm.operator_converters.LightGbm import convert_lightgbm

import lightgbm as lgb

### Conversion to ONNX

In [3]:
# Get GBMClassifier out of LIghtGBMWrapper
model = joblib.load('models/lightgbm.joblib').model

with open('cached/numpy_arrays.pickle', 'rb') as f:
        X_train, y_train, X_val, y_val, X_test, y_test = pickle.load(f)

update_registered_converter(
    lgb.LGBMClassifier, 'LightGbmLGBMClassifier',
    calculate_linear_classifier_output_shapes, convert_lightgbm,
    options={'nocl': [True, False], 'zipmap': [True, False, 'columns']})

model_onnx = convert_sklearn(
    model, 'lightgbm',
    [('input', FloatTensorType([None, 9]))],
    target_opset=12)

# And save.
with open("models/lightgbm.onnx", "wb") as f:
    f.write(model_onnx.SerializeToString())


### ONNX Inference

sess.run returns tuple of lists. The first list in the tuple contains the labels for each input (0 or 1). The second list in the tuple contains list of probabailities. The output the must be used is the probability of 0. In the following code cell, an example output of sess.run is printed.

In [7]:
import onnxruntime as rt
import numpy as np

try:
    sess = rt.InferenceSession("models/lightgbm.onnx")
except:
    sess = None

if sess is not None:
    pred_onx = sess.run(None, {"input": X_train.astype(np.float32)})

y_onnx = np.asarray([pred[0] for pred in pred_onx[1]])
y_target = model.predict_proba(X_train)[:,0]

print(sess.run(None, {"input": X_train.astype(np.float32)[:5,:]}))

[array([0, 0, 0, 0, 0], dtype=int64), [{0: 0.9938622713088989, 1: 0.006137728691101074}, {0: 0.9254972338676453, 1: 0.07450276613235474}, {0: 0.9979415535926819, 1: 0.0020584464073181152}, {0: 0.9901915788650513, 1: 0.00980842113494873}, {0: 0.9524827599525452, 1: 0.047517240047454834}]]


### Testing

In [4]:
try:
    np.testing.assert_array_almost_equal(y_target,y_onnx, decimal=3)
    print('Test passed')
except:
    print('Test failed')

Test failed


In [5]:
np.testing.assert_array_almost_equal(y_target,y_onnx, decimal=6)

AssertionError: 
Arrays are not almost equal to 6 decimals

Mismatched elements: 1 / 440487 (0.000227%)
Max absolute difference: 0.01507264
Max relative difference: 0.12429958
 x: array([0.993862, 0.925497, 0.997942, ..., 0.030456, 0.988451, 0.225883])
 y: array([0.993862, 0.925497, 0.997942, ..., 0.030456, 0.988451, 0.225883])

In [8]:
pred_onx = sess.run(None, {"input": X_test.astype(np.float32)})
y_onnx = np.asarray([pred[0] for pred in pred_onx[1]])
y_target = model.predict_proba(X_test)[:,0]

try:
    np.testing.assert_array_almost_equal(y_target,y_onnx, decimal=6)
    print('Test passed')
except:
    print('Test failed')

Test passed
