In [6]:
from xgboost import XGBClassifier
from catboost import CatBoostClassifier 
from lightgbm import LGBMClassifier 
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import GradientBoostingClassifier, AdaBoostClassifier, RandomForestClassifier
from sklearn.datasets import load_iris 
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split, cross_val_score 
from sklearn.metrics import confusion_matrix, classification_report

In [2]:
x, y = make_classification(
    n_samples=5000, 
    n_features= 10, 
    n_classes=3, 
    n_clusters_per_class=1
)

In [3]:
print(x[0:1])
print(y[0:5])

[[ 0.09195131  0.37494643  0.19721973 -0.84497924 -0.5703794  -0.41303475
   1.15518209 -1.15494228 -1.11933229 -0.49463934]]
[1 0 0 0 2]


In [4]:
xtrain, xtest, ytrain, ytest = train_test_split(x,y,random_state=42,test_size=.15)

In [15]:
xgbc = XGBClassifier(verbosity=0)
cb = CatBoostClassifier()
lgbmc = LGBMClassifier(verbose=0) 
rfc = RandomForestClassifier(verbose=0)
abc = AdaBoostClassifier()
dtc = DecisionTreeClassifier()

In [16]:
# train models 
xgbc.fit(xtrain,ytrain)
cb.fit(xtrain,ytrain)
lgbmc.fit(xtrain,ytrain)
rfc.fit(xtrain,ytrain)
abc.fit(xtrain,ytrain)
dtc.fit(xtrain,ytrain)



Learning rate set to 0.085187
0:	learn: 1.0013504	total: 2.99ms	remaining: 2.99s
1:	learn: 0.9297763	total: 6.66ms	remaining: 3.32s
2:	learn: 0.8608945	total: 9.55ms	remaining: 3.17s
3:	learn: 0.8072496	total: 12.7ms	remaining: 3.15s
4:	learn: 0.7584378	total: 15.8ms	remaining: 3.14s
5:	learn: 0.7199970	total: 18.7ms	remaining: 3.09s
6:	learn: 0.6847691	total: 22.3ms	remaining: 3.17s
7:	learn: 0.6530315	total: 25.3ms	remaining: 3.14s
8:	learn: 0.6292371	total: 28.1ms	remaining: 3.1s
9:	learn: 0.6052143	total: 30.8ms	remaining: 3.05s
10:	learn: 0.5831846	total: 33.9ms	remaining: 3.05s
11:	learn: 0.5643909	total: 37.2ms	remaining: 3.07s
12:	learn: 0.5468813	total: 41.1ms	remaining: 3.12s
13:	learn: 0.5321105	total: 45.3ms	remaining: 3.19s
14:	learn: 0.5182047	total: 48.3ms	remaining: 3.17s
15:	learn: 0.5060371	total: 51.2ms	remaining: 3.15s
16:	learn: 0.4965362	total: 54.8ms	remaining: 3.17s
17:	learn: 0.4870719	total: 57.8ms	remaining: 3.15s
18:	learn: 0.4773509	total: 60.5ms	remaining:

DecisionTreeClassifier()

In [19]:
# score models on training data
xgbc_score = xgbc.score(xtrain,ytrain)
cb_score = cb.score(xtrain,ytrain)
lgbmc_score = lgbmc.score(xtrain,ytrain)
rfc_score = rfc.score(xtrain,ytrain)
abc_score = abc.score(xtrain,ytrain)
dtc_score = dtc.score(xtrain,ytrain)

In [18]:
# cross validation score
xgbc_cv_score = cross_val_score(xgbc, xtrain,ytrain)
cb_cv_score = cross_val_score(cb, xtrain,ytrain)
lgbmc_cv_score = cross_val_score(lgbmc, xtrain,ytrain)
rfc_cv_score = cross_val_score(rfc, xtrain,ytrain)
abc_cv_scire = cross_val_score(abc, xtrain,ytrain)
dtc_cv_score = cross_val_score(dtc, xtrain,ytrain)



Learning rate set to 0.084223
0:	learn: 1.0039182	total: 4.35ms	remaining: 4.34s
1:	learn: 0.9312630	total: 7.37ms	remaining: 3.68s
2:	learn: 0.8643594	total: 10.2ms	remaining: 3.4s
3:	learn: 0.8093242	total: 14.3ms	remaining: 3.55s
4:	learn: 0.7627957	total: 17.3ms	remaining: 3.44s
5:	learn: 0.7250777	total: 20.1ms	remaining: 3.33s
6:	learn: 0.6904102	total: 23ms	remaining: 3.27s
7:	learn: 0.6605738	total: 27.5ms	remaining: 3.41s
8:	learn: 0.6348128	total: 30.9ms	remaining: 3.41s
9:	learn: 0.6120341	total: 33.8ms	remaining: 3.35s
10:	learn: 0.5917621	total: 36.7ms	remaining: 3.3s
11:	learn: 0.5712585	total: 39.7ms	remaining: 3.27s
12:	learn: 0.5557821	total: 43.4ms	remaining: 3.29s
13:	learn: 0.5402711	total: 46.3ms	remaining: 3.26s
14:	learn: 0.5261800	total: 49.2ms	remaining: 3.23s
15:	learn: 0.5135583	total: 52ms	remaining: 3.2s
16:	learn: 0.5037445	total: 54.7ms	remaining: 3.16s
17:	learn: 0.4937456	total: 58.7ms	remaining: 3.2s
18:	learn: 0.4838656	total: 62.4ms	remaining: 3.22s


In [20]:
# make predictions 
xgbc_pred = xgbc.predict(xtest)
cb_pred = cb.predict(xtest)
lgbmc_pred = lgbmc.predict(xtest)
rfc_pred = rfc.predict(xtest)
abc_pred = abc.predict(xtest)
dtc_pred = dtc.predict(xtest)

In [21]:
xgbc_cm = confusion_matrix(ytest, xgbc_pred)
cb_cm = confusion_matrix(ytest, cb_pred)
lgbmc_cm = confusion_matrix(ytest, lgbmc_pred)
rfc_cm = confusion_matrix(ytest, rfc_pred)
abc_cm = confusion_matrix(ytest, abc_pred)
dtc_cm = confusion_matrix(ytest, dtc_pred)

In [22]:
xgbc_cr = classification_report(ytest, xgbc_pred)
cb_cr = classification_report(ytest, cb_pred)
lgbmc_cr = classification_report(ytest, lgbmc_pred)
rfc_cr = classification_report(ytest, rfc_pred)
abc_cr = classification_report(ytest, abc_pred)
dtc_cr = classification_report(ytest, dtc_pred)

In [25]:
print(f'Training scores\nXGBoost:  {xgbc_score}\nCatBoost: {cb_score}\nLightGBM: {lgbmc_score}\nAdaBoost: {abc_score}\nRandom forest: {rfc_score}\nDecision Tree: {dtc_score}')

Training scores
XGBoost:  0.9997647058823529
CatBoost: 0.9771764705882353
LightGBM: 0.991764705882353
AdaBoost: 0.7967058823529412
Random forest: 1.0
Decision Tree: 1.0


In [27]:
print(f'Confusion matricies\nXGBoost: \n{xgbc_cm}\nCatBoost:\n{cb_cm}\nLightGBM:\n{lgbmc_cm}\nAdaBoost:\n{abc_cm}\nRandom forest:\n{rfc_cm}\nDecision Tree:\n{dtc_cm}')

Confusion matricies
XGBoost: 
[[177  43  17]
 [ 37 209   5]
 [ 12   4 246]]
CatBoost:
[[177  41  19]
 [ 34 212   5]
 [ 14   3 245]]
LightGBM:
[[173  45  19]
 [ 37 209   5]
 [ 16   3 243]]
AdaBoost:
[[148  43  46]
 [ 41 204   6]
 [ 10   4 248]]
Random forest:
[[178  43  16]
 [ 36 210   5]
 [ 15   3 244]]
Decision Tree:
[[152  60  25]
 [ 57 187   7]
 [ 23   7 232]]


In [28]:
print(f'Classification Reports\nXGBoost: \n{xgbc_cr}\nCatBoost:\n{cb_cr}\nLightGBM:\n{lgbmc_cr}\nAdaBoost:\n{abc_cr}\nRandom forest:\n{rfc_cr}\nDecision Tree:\n{dtc_cr}')

Classification Reports
XGBoost: 
              precision    recall  f1-score   support

           0       0.78      0.75      0.76       237
           1       0.82      0.83      0.82       251
           2       0.92      0.94      0.93       262

    accuracy                           0.84       750
   macro avg       0.84      0.84      0.84       750
weighted avg       0.84      0.84      0.84       750

CatBoost:
              precision    recall  f1-score   support

           0       0.79      0.75      0.77       237
           1       0.83      0.84      0.84       251
           2       0.91      0.94      0.92       262

    accuracy                           0.85       750
   macro avg       0.84      0.84      0.84       750
weighted avg       0.84      0.85      0.84       750

LightGBM:
              precision    recall  f1-score   support

           0       0.77      0.73      0.75       237
           1       0.81      0.83      0.82       251
           2       0.9