# OMLT for Gradient Boosted Trees

Start by loading the required libraries.

In [2]:
import random
import tempfile
import numpy as np
import lightgbm as lgb
import pyomo.environ as pe
from onnxmltools.convert.lightgbm.convert import convert
from skl2onnx.common.data_types import FloatTensorType

from helpers import generate_gbt_data

random.seed(100)

Generate random data.

In [3]:
data, categorical_var_index = generate_gbt_data()

In [4]:
categorical_var_index

[3]

In [5]:
FIXED_PARAMS = {'objective': 'regression',
                'metric': 'rmse',
                'boosting': 'gbdt',
                'num_boost_round': 20,
                'max_depth': 3,
                'min_data_in_leaf': 2,
                'min_data_per_group': 2,
                'random_state': 100,
                'verbose': -1}

Train model.

In [6]:
if categorical_var_index:
    train_data = lgb.Dataset(data['X'], label=data['y'],
                             categorical_feature=categorical_var_index,
                             free_raw_data=False,
                             params={'verbose': -1})

    model = lgb.train(FIXED_PARAMS, train_data,
                      categorical_feature=categorical_var_index,
                      verbose_eval=False)
else:
    train_data = lgb.Dataset(data['X'], label=data['y'],
                             params={'verbose': -1})

    model = lgb.train(FIXED_PARAMS, train_data,
                      verbose_eval=False)



In [7]:
model.num_feature()

4

ONNX needs to know the number of features and their type.
Notice that we specify that all variables are continous since the ONNX exporter does not support categorical variables.
We will specify which variables are categorical using Pyomo.

In [8]:
initial_type = [('float_input', FloatTensorType([None, model.num_feature()]))]
onnx_model = convert(model, initial_types=initial_type, target_opset=13)

The maximum opset needed by this model is only 8.


Write ONNX model to a file so that it can be inspected using a tool like [Netron](https://netron.app/).

In [9]:
with tempfile.NamedTemporaryFile(suffix='.onnx', delete=False) as f:
    f.write(onnx_model.SerializeToString())
    print(f'Onnx model written to {f.name}')

Onnx model written to /run/user/1000/tmpjk44q3bp.onnx


Create Pyomo model. Bounds for continuous and categorical variables are specified by the user.

In [10]:
m = pe.ConcreteModel()

input_bounds = [
    (-2, 2),
    (-2, 2),
    (-2, 2),
    (0, 1),
]
input_domain = [
    pe.Reals,
    pe.Reals,
    pe.Reals,
    pe.Integers,
]

In [11]:
from omlt.block import OmltBlock
from omlt.gbt import BigMFormulation, GradientBoostedTreeModel

In [12]:
m.gbt = OmltBlock()
gbt_model = GradientBoostedTreeModel(onnx_model, input_bounds=input_bounds)
formulation = BigMFormulation(gbt_model)
m.gbt.build_formulation(formulation)
m.obj = pe.Objective(expr=m.gbt.outputs[0])

In [13]:
m.pprint()

1 Objective Declarations
    obj : Size=1, Index=None, Active=True
        Key  : Active : Sense    : Expression
        None :   True : minimize : gbt.outputs[0]

1 Block Declarations
    gbt : Size=1, Index=None, Active=True
        12 Set Declarations
            categorical_index : Size=1, Index=None, Ordered=False
                Key  : Dimen : Domain : Size : Members
                None :    -- :    Any :    0 :      {}
            inputs_set : Size=1, Index=None, Ordered=Insertion
                Key  : Dimen : Domain : Size : Members
                None :     1 :    Any :    4 : {0, 1, 2, 3}
            left_split_index : Size=1, Index=None, Ordered=Insertion
                Key  : Dimen : Domain : Size : Members
                None :     2 :    Any :  106 : {(0, 0), (0, 2), (0, 3), (0, 4), (0, 9), (0, 10), (1, 0), (1, 2), (1, 3), (1, 4), (1, 9), (2, 0), (2, 2), (2, 3), (2, 4), (2, 9), (2, 10), (3, 0), (3, 2), (3, 3), (3, 4), (3, 10), (4, 0), (4, 2), (4, 3), (4, 4), (4, 9), 

In [14]:
solver = pe.SolverFactory('cbc')
solver.solve(m, tee=True)

Welcome to the CBC MILP Solver 
Version: 2.9.10 
Build Date: Feb 23 2019 

command line - /home/fra/.nix-profile/bin/cbc -printingOptions all -import /run/user/1000/tmp__g_4rkp.pyomo.lp -stat=1 -solve -solu /run/user/1000/tmp__g_4rkp.pyomo.soln (default strategy 1)
Option for printingOptions changed from normal to all
Presolve 277 (-59) rows, 164 (-36) columns and 761 (-367) elements
Statistics for presolved model
Original problem has 35 integers (35 of which binary)
Presolved problem has 35 integers (35 of which binary)
==== 38 zero objective 127 different
==== absolute objective values 127 different
==== for integers 35 zero objective 1 different
35 variables have objective of 0
==== for integers absolute objective values 1 different
35 variables have objective of 0
===== end objective counts


Problem has 277 rows, 164 columns (126 with objective) and 761 elements
Column breakdown:
126 of type 0.0->inf, 0 of type 0.0->up, 0 of type lo->inf, 
3 of type lo->up, 0 of type free, 0 of ty

{'Problem': [{'Name': 'unknown', 'Lower bound': -12.67366272, 'Upper bound': -12.67366272, 'Number of objectives': 1, 'Number of constraints': 277, 'Number of variables': 164, 'Number of binary variables': 35, 'Number of integer variables': 35, 'Number of nonzeros': 126, 'Sense': 'minimize'}], 'Solver': [{'Status': 'ok', 'User time': -1.0, 'System time': 0.02, 'Wallclock time': 0.02, 'Termination condition': 'optimal', 'Termination message': 'Model was solved to optimality (subject to tolerances), and an optimal solution is available.', 'Statistics': {'Branch and bound': {'Number of bounded subproblems': 0, 'Number of created subproblems': 0}, 'Black box': {'Number of iterations': 0}}, 'Error rc': 0, 'Time': 0.030335664749145508}], 'Solution': [OrderedDict([('number of solutions', 0), ('number of solutions displayed', 0)])]}

In [15]:
m.gbt.inputs.pprint()

inputs : Size=4, Index=gbt.inputs_set
    Key : Lower : Value     : Upper : Fixed : Stale : Domain
      0 :    -2 : 1.9116234 :     2 : False : False :  Reals
      1 :    -2 : 1.4799949 :     2 : False : False :  Reals
      2 :    -2 : 1.2360773 :     2 : False : False :  Reals
      3 :     0 :         0 :     1 : False :  True :  Reals
