In [248]:
import time
import lightgbm as lgb
from sklearn.datasets import make_regression

## Test 1

In [256]:
model_params ={
      "boosting_type": "gbdt",
      "objective": 'quantile',
      "num_trees": 300,
      "learning_rate": 0.2,
      "max_depth": 8,
      "min_data_in_leaf": 50,
      "max_leaves": 128,
      "bagging_fraction": 1,
      "bagging_freq": 0,
      "feature_fraction": 1,
      "lambda_l1": 0.0,
      "lambda_l2": 0.001,
      "min_child_weight": 1e-3, 
      "alpha": 0.5,
      "max_bin": 30,
      "n_jobs":4}

In [257]:
model = lgb.LGBMRegressor(objective='quantile',
                          alpha=0.5,
                          boosting_type=model_params.get('boosting_type', 'gbdt'),
                          n_estimators=model_params.get('num_trees', 100),
                          learning_rate=model_params.get('learning_rate', 0.1), 
                          max_depth=model_params.get('max_depth', -1), 
                          min_child_samples=model_params.get('min_data_in_leaf', 20), 
                          num_leaves=model_params.get('max_leaves', 31),
                          subsample=model_params.get('bagging_fraction', 1.0), 
                          subsample_freq=model_params.get('bagging_freq', 0.0), 
                          colsample_bytree=model_params.get('feature_fraction', 1.0), 
                          reg_alpha=model_params.get('lambda_l1', 0.0), 
                          reg_lambda=model_params.get('lambda_l2', 0.0), 
                          min_sum_hessian_in_leaf=model_params.get("min_child_weightt"),
                          random_state=111,
                          **{'max_bin': model_params['max_bin'], 'n_jobs': model_params['n_jobs']})  

In [258]:
X, y = make_regression(n_samples=200, n_features=100, n_informative=10, n_targets=1, bias=0.0, effective_rank=None, tail_strength=0.5, noise=0.0, shuffle=True, coef=False, random_state=None)
X_train, y_train = X[:100], y[:100]
X_test, y_test = X[100:], y[100:]



In [None]:
# fit
t0 = time.time()
for _ in range(500):
    model.fit(X_train,
              y_train,
              sample_weight=None,
              verbose=False,
              callbacks=None)
print(time.time()-t0)

In [None]:
# train
train_set = lightgbm.Dataset(X_train, label=y_train, free_raw_data=False)
t0 = time.time()
for _ in range(500):
    gbm = lightgbm.train(model_params, train_set)
print(time.time()-t0)


In [254]:
print(gbm.predict(X)-model.predict(X))

[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0.]


## Test 2

In [255]:
import numpy as np
import lightgbm as lgbm

# Generate Data Set
xs = np.linspace(0, 10, 100).reshape((-1, 1)) 
ys = xs**2 + 4*xs + 5.2
ys = ys.reshape((-1,))

# Or you could add to your alg_conf "min_child_weight": 1e-3, "min_child_samples": 20.

# LGBM configuration
alg_conf = {
    "num_boost_round":25,
    "max_depth" : 3,
    "num_leaves" : 31,
    'learning_rate' : 0.1,
    'boosting_type' : 'gbdt',
    'objective' : 'regression_l2',
    "early_stopping_rounds": None,
    "min_child_weight": 1e-3, 
    "min_child_samples": 20
}

# Calling Regressor using scikit-learn API 
sk_reg = lgbm.sklearn.LGBMRegressor(
    num_leaves=alg_conf["num_leaves"], 
    n_estimators=alg_conf["num_boost_round"], 
    max_depth=alg_conf["max_depth"],
    learning_rate=alg_conf["learning_rate"],
    objective=alg_conf["objective"],
    min_sum_hessian_in_leaf=alg_conf["min_child_weight"],
    min_data_in_leaf=alg_conf["min_child_samples"]
)

print("Scikit-learn API results")
t0 = time.time()
for _ in range(1000):
    sk_reg.fit(xs, ys)
print(time.time()-t0)

print("Native API results")
# Calling Regressor using native API 
train_dataset = lgbm.Dataset(xs, ys)
t0 = time.time()
for _ in range(1000):
    lg_reg = lgbm.train(alg_conf.copy(), train_dataset)
print(time.time()-t0)

print(lg_reg.predict(xs)-sk_reg.predict(xs))

Scikit-learn API results
2.437354803085327
Native API results




2.773380994796753
[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0.]
