In [1]:
from sklearn.datasets import load_breast_cancer, load_iris
from sklearn.model_selection import train_test_split
from dm_utils.hom import HOM
import warnings
warnings.filterwarnings("ignore")



In [2]:
x, y = load_breast_cancer(return_X_y=True, as_frame=True)
xtrain, xtest, ytrain, ytest = train_test_split(x, y, test_size=0.2, random_state=42)

## XGBClassifier

In [3]:
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score

xgb = XGBClassifier()
xgb.fit(xtrain, ytrain)
accuracy_score(xgb.predict(xtest), ytest)

0.956140350877193

## LGBMClassifier

In [4]:
from lightgbm import LGBMClassifier

lgb = LGBMClassifier(verbosity=-1)
lgb.fit(xtrain, ytrain)
accuracy_score(lgb.predict(xtest), ytest)

0.9649122807017544

## CatBoostClassifier

In [5]:
from catboost import CatBoostClassifier

cb = CatBoostClassifier()
cb.fit(xtrain, ytrain, verbose_eval=200)
accuracy_score(cb.predict(xtest), ytest)

Learning rate set to 0.00736
0:	learn: 0.6812707	total: 47.7ms	remaining: 47.6s
200:	learn: 0.0860662	total: 209ms	remaining: 830ms
400:	learn: 0.0366651	total: 367ms	remaining: 549ms
600:	learn: 0.0208127	total: 524ms	remaining: 348ms
800:	learn: 0.0132461	total: 683ms	remaining: 170ms
999:	learn: 0.0089993	total: 844ms	remaining: 0us


0.9736842105263158

## dm_utils

xgb

In [6]:
hom = HOM(task='cls', model='cb')
hom.fit(xtrain, ytrain, xtrain, ytrain, record_time=True)

[32m[INFO] training begin.[0m
[32m[INFO] Model 1 / 1 CatBoost training begin.[0m
0:	test: 0.9667315	best: 0.9667315 (0)	total: 1.64ms	remaining: 1.64s
100:	test: 1.0000000	best: 1.0000000 (78)	total: 105ms	remaining: 932ms
200:	test: 1.0000000	best: 1.0000000 (78)	total: 208ms	remaining: 827ms
Stopped by overfitting detector  (200 iterations wait)

bestTest = 1
bestIteration = 78

Shrink model to first 79 iterations.
[32m[INFO] Model 1 / 1 CatBoost training finish, cost time 0.316 s.[0m
[36m[SUCEESS] 1 / 1 model validation scores: {'acc': 0.9934065934065934, 'model': 'CatBoost'}[0m
[32m[INFO] train finish, cost time 0.316 s.[0m
[36m[SUCEESS] total 1 model validation scores:{'acc': 0.9934065934065934, 'model': 'CatBoost'}[0m


Unnamed: 0,acc,model
model0,0.993407,CatBoost
all,0.993407,CatBoost


In [7]:
accuracy_score(hom.predict(xtest) > 0.5, ytest)

0.9649122807017544

In [8]:
hom.feature_importance()

Unnamed: 0,feature,importance
0,worst perimeter,11.494139
1,mean concave points,10.742731
2,worst texture,10.619877
3,worst concave points,10.525718
4,worst radius,10.232319
5,worst area,7.39228
6,area error,3.593617
7,mean texture,3.494309
8,worst smoothness,3.314348
9,mean concavity,3.241128


xgb, lgb, cb

In [9]:
hom = HOM(task='cls', model=['xgb', 'lgb', 'cb'])
hom.fit(xtrain, ytrain, xtrain, ytrain, record_time=True)

[32m[INFO] training begin.[0m
[32m[INFO] Model 1 / 3 XGBoost training begin.[0m
[0]	valid-auc:0.95974
[100]	valid-auc:0.99611
[200]	valid-auc:0.99746
[300]	valid-auc:0.99822
[400]	valid-auc:0.99837
[500]	valid-auc:0.99880
[600]	valid-auc:0.99894
[700]	valid-auc:0.99919
[800]	valid-auc:0.99926
[900]	valid-auc:0.99934
[999]	valid-auc:0.99938
[32m[INFO] Model 1 / 3 XGBoost training finish, cost time 0.349 s.[0m
[36m[SUCEESS] 1 / 3 model validation scores: {'acc': 0.9934065934065934, 'model': 'XGBoost'}[0m
[32m[INFO] Model 2 / 3 LightGBM training begin.[0m
Training until validation scores don't improve for 200 rounds
[100]	valid_0's auc: 0.995924	valid_0's binary_logloss: 0.176535	valid_0's binary_error: 0.021978
[200]	valid_0's auc: 0.998345	valid_0's binary_logloss: 0.091842	valid_0's binary_error: 0.010989
[300]	valid_0's auc: 0.999255	valid_0's binary_logloss: 0.0630309	valid_0's binary_error: 0.00879121
[400]	valid_0's auc: 0.99971	valid_0's binary_logloss: 0.0495869	valid_0

Unnamed: 0,acc,model
model0,0.993407,XGBoost
model1,0.991209,LightGBM
model2,0.993407,CatBoost
all,0.993407,"CatBoost,LightGBM,XGBoost"


In [10]:
accuracy_score(hom.predict(xtest) > 0.5, ytest)

0.9736842105263158

In [11]:
print(hom.feature_importances())
hom.feature_importance(0)

[                    feature importance
0           worst perimeter  20.561209
1       mean concave points  20.401163
2      worst concave points  17.678415
3              worst radius   8.741088
4                worst area   5.612785
5         worst compactness   5.367196
6           worst concavity   2.798726
7             worst texture   1.984681
8                area error   1.644873
9                 mean area   1.518102
10             mean texture   1.489551
11              mean radius   1.475256
12   mean fractal dimension   0.988325
13           mean concavity   0.922023
14           worst symmetry   0.855692
15         mean compactness   0.765597
16         worst smoothness   0.758614
17          perimeter error   0.732623
18             radius error   0.725276
19          mean smoothness   0.642015
20          concavity error   0.609813
21        compactness error   0.602667
22           symmetry error   0.510693
23  worst fractal dimension   0.468414
24     concave points er

Unnamed: 0,feature,importance
0,worst perimeter,20.561209
1,mean concave points,20.401163
2,worst concave points,17.678415
3,worst radius,8.741088
4,worst area,5.612785
5,worst compactness,5.367196
6,worst concavity,2.798726
7,worst texture,1.984681
8,area error,1.644873
9,mean area,1.518102
