In [107]:
from sklearn import datasets
from sklearn.model_selection import train_test_split
import numpy as np
from sklearn.preprocessing import OrdinalEncoder
from sklearn.model_selection import KFold
from sklearn.metrics import accuracy_score

from lightgbm_model import MyLightGBM
from catboost_model import MyCatboost
from xgboost_model import MyXgboost
%run lightgbm_model.py
%run catboost_model.py
%run xgboost_model.py

In [88]:
X = datasets.fetch_covtype().data[:3000]
y = datasets.fetch_covtype().target[:3000]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1)

print(X_train.shape, y_test.shape)
print(np.unique(y_train))  # 7分类任务

(2700, 54) (300,)
[1 2 3 4 5 6 7]


In [89]:
enc = OrdinalEncoder()
y_train_enc = enc.fit_transform(y_train.reshape(-1, 1)).reshape(-1, )
y_test_enc = enc.transform(y_test.reshape(-1, 1)).reshape(-1, )
print(np.unique(y_train_enc))
print(y_train_enc.shape)

[0. 1. 2. 3. 4. 5. 6.]
(2700,)


In [90]:
kfold = KFold(n_splits=5, shuffle=True, random_state=1)
lgb_params = {"objective": "multiclass",
              "num_class": 7,
              "verbose": -4, "metric": ("multi_logloss",)}

cat_params = {"loss_function": "MultiClass",
              "eval_metric": "MultiClass",
              "allow_writing_files": False,
              "verbose": True,
              "thread_count": -1,
              "use_best_model": True
              }

xgb_params = {'objective': 'multi:softprob',
                "eval_metric": 'mlogloss',
                "verbosity": 0,
                'num_class': 7}

### lightgbm测试

In [91]:

lgb_train_pre, lgb_test_pre, lgb_model_list = MyLightGBM(X_train_data=X_train,
                                                         y_train_data=y_train_enc,
                                                         X_test_data=X_test,
                                                         kfold=kfold,
                                                         params=lgb_params,
                                                         verbose_eval=100,
                                                         early_stopping_rounds=100,
                                                         feval=None, fweight=None, categorical_feature="auto")

Training fold 1
Training until validation scores don't improve for 100 rounds
[100]	training's multi_logloss: 0.0054711	valid_1's multi_logloss: 0.491722
Did not meet early stopping. Best iteration is:
[100]	training's multi_logloss: 0.0054711	valid_1's multi_logloss: 0.491722
Training fold 2
Training until validation scores don't improve for 100 rounds
[100]	training's multi_logloss: 0.00622442	valid_1's multi_logloss: 0.436128
Did not meet early stopping. Best iteration is:
[100]	training's multi_logloss: 0.00622442	valid_1's multi_logloss: 0.436128
Training fold 3
Training until validation scores don't improve for 100 rounds
[100]	training's multi_logloss: 0.00594682	valid_1's multi_logloss: 0.481893
Did not meet early stopping. Best iteration is:
[100]	training's multi_logloss: 0.00594682	valid_1's multi_logloss: 0.481893
Training fold 4
Training until validation scores don't improve for 100 rounds
[100]	training's multi_logloss: 0.00604186	valid_1's multi_logloss: 0.409178
Did not

In [92]:
print(lgb_train_pre.shape)
print(lgb_test_pre.shape)
print(lgb_model_list)

(2700, 7)
(300, 7)
[<lightgbm.basic.Booster object at 0x00000153839B7D60>, <lightgbm.basic.Booster object at 0x00000153839B7D30>, <lightgbm.basic.Booster object at 0x00000153839B7EE0>, <lightgbm.basic.Booster object at 0x00000153839B74C0>, <lightgbm.basic.Booster object at 0x00000153839B77C0>]


### catboost测试

In [93]:
cat_train_pre, cat_test_pre, cat_model_list = MyCatboost(X_train_data=X_train,
                                                         y_train_data=y_train_enc,
                                                         X_test_data=X_test,
                                                         kfold=kfold,
                                                         params=cat_params,
                                                         num_class=7,
                                                         early_stopping_rounds=100,
                                                         verbose_eval=200,
                                                         fweight=None)

Training fold 1
Learning rate set to 0.109652
0:	learn: 1.7213354	test: 1.7265158	best: 1.7265158 (0)	total: 6.96ms	remaining: 6.95s
200:	learn: 0.2844294	test: 0.4388946	best: 0.4388946 (200)	total: 1.23s	remaining: 4.89s
400:	learn: 0.1731654	test: 0.4005388	best: 0.3998729 (381)	total: 2.44s	remaining: 3.65s
600:	learn: 0.1177499	test: 0.3888427	best: 0.3882681 (588)	total: 3.67s	remaining: 2.43s
Stopped by overfitting detector  (100 iterations wait)

bestTest = 0.3882680654
bestIteration = 588

Shrink model to first 589 iterations.
Training fold 2
Learning rate set to 0.109652
0:	learn: 1.7174810	test: 1.7236708	best: 1.7236708 (0)	total: 36.4ms	remaining: 36.4s
200:	learn: 0.2807979	test: 0.4340126	best: 0.4340126 (200)	total: 1.26s	remaining: 5.03s
400:	learn: 0.1735725	test: 0.3857660	best: 0.3857660 (400)	total: 2.49s	remaining: 3.72s
600:	learn: 0.1180759	test: 0.3687095	best: 0.3685478 (590)	total: 3.72s	remaining: 2.47s
800:	learn: 0.0872829	test: 0.3661200	best: 0.3660797 (

In [94]:
print(cat_train_pre.shape)
print(cat_test_pre.shape)
print(cat_model_list)

(2700, 7)
(300, 7)
[<catboost.core.CatBoost object at 0x00000153EE7F6E80>, <catboost.core.CatBoost object at 0x00000153EE625B20>, <catboost.core.CatBoost object at 0x00000153EDC18A90>, <catboost.core.CatBoost object at 0x00000153EDC18430>, <catboost.core.CatBoost object at 0x00000153EDC182B0>]


### xgboost测试

In [95]:
xgb_train_pre, xgb_test_pre, xgb_model_list = MyXgboost(X_train_data=X_train,
                                                        y_train_data=y_train_enc,
                                                        X_test_data=X_test,
                                                        kfold=kfold,
                                                        params=xgb_params,
                                                        early_stopping_rounds=100,
                                                        verbose_eval=200,
                                                        feval=None,
                                                        fweight=None)

Training fold 1
[0]	train_-mlogloss:1.34850	val_-mlogloss:1.41306
[9]	train_-mlogloss:0.35535	val_-mlogloss:0.56068
Training fold 2
[0]	train_-mlogloss:1.34961	val_-mlogloss:1.40848
[9]	train_-mlogloss:0.36449	val_-mlogloss:0.57160
Training fold 3
[0]	train_-mlogloss:1.35684	val_-mlogloss:1.43233
[9]	train_-mlogloss:0.35594	val_-mlogloss:0.56430
Training fold 4
[0]	train_-mlogloss:1.34559	val_-mlogloss:1.41565
[9]	train_-mlogloss:0.35403	val_-mlogloss:0.56247
Training fold 5
[0]	train_-mlogloss:1.35737	val_-mlogloss:1.43258
[9]	train_-mlogloss:0.34215	val_-mlogloss:0.58344


In [96]:
print(xgb_train_pre.shape)
print(xgb_test_pre.shape)
print(xgb_model_list)

(2700, 7)
(300, 7)
[<xgboost.core.Booster object at 0x00000153839BFD90>, <xgboost.core.Booster object at 0x00000153839BF610>, <xgboost.core.Booster object at 0x00000153839BF460>, <xgboost.core.Booster object at 0x00000153839BF550>, <xgboost.core.Booster object at 0x00000153839BF640>]


In [109]:
xgb_score = accuracy_score(y_test_enc, np.argmax(xgb_test_pre, axis=1))
lgb_score = accuracy_score(y_test_enc, np.argmax(lgb_test_pre, axis=1))
cat_score = accuracy_score(y_test_enc, np.argmax(cat_test_pre, axis=1))

print("xgb score:", xgb_score)
print("lgb score:", lgb_score)
print("cat score", cat_score)

xgb score: 0.8266666666666667
lgb score: 0.8566666666666667
cat score 0.8466666666666667
