We will train and convert a LGBM model with categorical features according to this Kaggle notebook: https://www.kaggle.com/ezietsman/simple-python-lightgbm-example?select=train.csv
The data is sourced from Porto Seguro's Safe Driver Prediction competition: https://www.kaggle.com/c/porto-seguro-safe-driver-prediction

In [1]:
import numpy as np
import pandas as pd
import lightgbm
from sklearn.model_selection import train_test_split

In [2]:
# Prepare training data 
train = pd.read_csv('data/lgbm_cat_train.csv')

# get the labels
y = train.target.values
train.drop(['id', 'target'], inplace=True, axis=1)

x = train.values

# Create training and validation sets
x, x_test, y, y_test = train_test_split(x, y, test_size=0.2, random_state=42, stratify=y)

# Create the LightGBM data containers
categorical_features = [c for c, col in enumerate(train.columns) if 'cat' in col]
train_data = lightgbm.Dataset(x, label=y, categorical_feature=categorical_features)
test_data = lightgbm.Dataset(x_test, label=y_test)

# Train the model

parameters = {
    'application': 'binary',
    'objective': 'binary',
    'metric': 'auc',
    'is_unbalance': 'true',
    'boosting': 'gbdt',
    'num_leaves': 31,
    'feature_fraction': 0.5,
    'bagging_fraction': 0.5,
    'bagging_freq': 20,
    'learning_rate': 0.05,
    'verbose': 0
}

model = lightgbm.train(parameters,
                       train_data,
                       valid_sets=test_data,
                       num_boost_round=5000,
                       early_stopping_rounds=100)

New categorical_feature is [1, 3, 4, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31]
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


[1]	valid_0's auc: 0.584237
Training until validation scores don't improve for 100 rounds
[2]	valid_0's auc: 0.599598
[3]	valid_0's auc: 0.602762
[4]	valid_0's auc: 0.609208
[5]	valid_0's auc: 0.611371
[6]	valid_0's auc: 0.613197
[7]	valid_0's auc: 0.61389
[8]	valid_0's auc: 0.614915
[9]	valid_0's auc: 0.617082
[10]	valid_0's auc: 0.618508
[11]	valid_0's auc: 0.618926
[12]	valid_0's auc: 0.619614
[13]	valid_0's auc: 0.619914
[14]	valid_0's auc: 0.620229
[15]	valid_0's auc: 0.620847
[16]	valid_0's auc: 0.621647
[17]	valid_0's auc: 0.621727
[18]	valid_0's auc: 0.622211
[19]	valid_0's auc: 0.622473
[20]	valid_0's auc: 0.62259
[21]	valid_0's auc: 0.622761
[22]	valid_0's auc: 0.623127
[23]	valid_0's auc: 0.623294
[24]	valid_0's auc: 0.623323
[25]	valid_0's auc: 0.623199
[26]	valid_0's auc: 0.623162
[27]	valid_0's auc: 0.623089
[28]	valid_0's auc: 0.622791
[29]	valid_0's auc: 0.623458
[30]	valid_0's auc: 0.623785
[31]	valid_0's auc: 0.624168
[32]	valid_0's auc: 0.623907
[33]	valid_0's auc: 0

In [3]:
# Convert to ONNX
from IVaps import convert_to_onnx

x_dummy = x[0]
save_path = "models/lgbm_cat.onnx"
convert_to_onnx(model, x_dummy, save_path, "lightgbm", target_opset=12)

RuntimeError: Operator LgbmClassifier (type: LgbmClassifier) got an input c_inputs with a wrong type <class 'onnxconverter_common.data_types.DoubleTensorType'>. Only [<class 'onnxconverter_common.data_types.FloatTensorType'>, <class 'onnxconverter_common.data_types.Int64TensorType'>] are allowed