In [2]:
import xgboost
print('xgboost version:',xgboost.__version__)
import lightgbm
print('lightgbm version:',lightgbm.__version__)
import catboost
print('catboost version:',catboost.__version__)

xgboost version: 1.7.5
lightgbm version: 3.3.5
catboost version: 1.2


In [3]:
from sklearn.datasets import make_regression
X, y = make_regression(n_samples=2000, n_features=20)

In [4]:
X.shape

(2000, 20)

In [5]:
from lightgbm import LGBMRegressor
from catboost import CatBoostRegressor
from xgboost import XGBRegressor
from sklearn.ensemble import RandomForestRegressor, ExtraTreesRegressor, GradientBoostingRegressor,AdaBoostRegressor

models = [RandomForestRegressor(n_estimators=100),
          ExtraTreesRegressor(n_estimators=100),
          AdaBoostRegressor(n_estimators=100),
          GradientBoostingRegressor(n_estimators=100),
          XGBRegressor(n_estimators=100,objective='reg:squarederror'),
          CatBoostRegressor(verbose=0, n_estimators=100),
          LGBMRegressor(n_estimators=100)]


In [6]:
for model in models:
  print(type(model).__name__)
  %timeit model.fit(X,y)

RandomForestRegressor
5.85 s ± 407 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
ExtraTreesRegressor
2.19 s ± 40.7 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
AdaBoostRegressor
2.04 s ± 195 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
GradientBoostingRegressor
2.56 s ± 67.9 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
XGBRegressor
1.82 s ± 345 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
CatBoostRegressor
1.49 s ± 196 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
LGBMRegressor
770 ms ± 147 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [7]:
for model in models:
  print(type(model).__name__)
  %timeit model.predict(X)

RandomForestRegressor
89.7 ms ± 1.03 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)
ExtraTreesRegressor
114 ms ± 20.9 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)
AdaBoostRegressor
109 ms ± 5.76 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)
GradientBoostingRegressor
8.3 ms ± 454 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
XGBRegressor
13.9 ms ± 7.06 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)
CatBoostRegressor
37 ms ± 4.35 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)
LGBMRegressor
22.2 ms ± 3.62 ms per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [8]:
from numpy import mean,std,sqrt
from sklearn.model_selection import cross_val_score

for model in models:
  n_scores = cross_val_score(model, X, y, scoring='neg_mean_squared_error', cv=10, n_jobs=-1)
  n_scores = sqrt(-n_scores)
  print(f'{type(model).__name__} RMSE: %.3f (%.3f)' % (mean(n_scores), std(n_scores)))

RandomForestRegressor RMSE: 103.672 (5.209)
ExtraTreesRegressor RMSE: 96.221 (5.883)
AdaBoostRegressor RMSE: 103.997 (5.030)
GradientBoostingRegressor RMSE: 67.528 (3.880)
XGBRegressor RMSE: 79.334 (5.688)
CatBoostRegressor RMSE: 35.687 (2.901)
LGBMRegressor RMSE: 59.063 (3.792)


# Clasificación

In [9]:
from sklearn.datasets import load_digits

dataset = load_digits(n_class=2,as_frame=True)


In [10]:
X, y = dataset["data"], dataset["target"]
X = X/16.

In [11]:
X.head()

Unnamed: 0,pixel_0_0,pixel_0_1,pixel_0_2,pixel_0_3,pixel_0_4,pixel_0_5,pixel_0_6,pixel_0_7,pixel_1_0,pixel_1_1,...,pixel_6_6,pixel_6_7,pixel_7_0,pixel_7_1,pixel_7_2,pixel_7_3,pixel_7_4,pixel_7_5,pixel_7_6,pixel_7_7
0,0.0,0.0,0.3125,0.8125,0.5625,0.0625,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.375,0.8125,0.625,0.0,0.0,0.0
1,0.0,0.0,0.0,0.75,0.8125,0.3125,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.6875,1.0,0.625,0.0,0.0
2,0.0,0.0,0.0625,0.5625,0.9375,0.6875,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0625,0.625,0.8125,0.1875,0.0,0.0
3,0.0,0.0,0.0,0.0,0.875,0.8125,0.0625,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0625,0.8125,1.0,0.0625,0.0
4,0.0,0.0,0.1875,0.8125,0.6875,0.4375,0.0,0.0,0.0,0.0,...,0.0625,0.0,0.0,0.0,0.125,0.75,0.8125,0.25,0.0,0.0


In [12]:
y.head()

0    0
1    1
2    0
3    1
4    0
Name: target, dtype: int64

In [13]:
from lightgbm import LGBMClassifier
from catboost import CatBoostClassifier
from xgboost import XGBClassifier
from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier, GradientBoostingClassifier,AdaBoostClassifier

models = [RandomForestClassifier(),
          ExtraTreesClassifier(n_estimators=100),
          AdaBoostClassifier(n_estimators=100),
          GradientBoostingClassifier(n_estimators=100),
          XGBClassifier(n_estimators=100,eval_metric='logloss'),
          CatBoostClassifier(verbose=0, n_estimators=100),
          LGBMClassifier(n_estimators=100)]

In [14]:
for model in models:
  print(type(model).__name__)
  %timeit model.fit(X,y)

RandomForestClassifier
711 ms ± 194 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
ExtraTreesClassifier
436 ms ± 31.4 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
AdaBoostClassifier
778 ms ± 126 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
GradientBoostingClassifier
404 ms ± 43 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
XGBClassifier
217 ms ± 15.1 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
CatBoostClassifier
430 ms ± 18.5 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
LGBMClassifier
194 ms ± 116 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [15]:
for model in models:
  print(type(model).__name__)
  %timeit model.predict(X)

RandomForestClassifier
40.8 ms ± 1.06 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)
ExtraTreesClassifier
41.1 ms ± 505 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)
AdaBoostClassifier
72.2 ms ± 7.04 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)
GradientBoostingClassifier
6.62 ms ± 571 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
XGBClassifier
20.8 ms ± 9.22 ms per loop (mean ± std. dev. of 7 runs, 100 loops each)
CatBoostClassifier
5.06 ms ± 91.6 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
LGBMClassifier
5.63 ms ± 758 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [16]:
for model in models:
  n_scores = cross_val_score(model, X, y, scoring='roc_auc', cv=3, n_jobs=-1)
  print(f'{type(model).__name__} roc_auc: %.3f (%.3f)' % (mean(n_scores), std(n_scores)))

RandomForestClassifier roc_auc: 1.000 (0.000)
ExtraTreesClassifier roc_auc: 1.000 (0.000)
AdaBoostClassifier roc_auc: 0.989 (0.015)
GradientBoostingClassifier roc_auc: 0.992 (0.012)
XGBClassifier roc_auc: 0.989 (0.015)
CatBoostClassifier roc_auc: 1.000 (0.000)
LGBMClassifier roc_auc: 1.000 (0.000)
