In [1]:
from sklearn import datasets
from sklearn.model_selection import train_test_split
import numpy as np
from sklearn.preprocessing import OrdinalEncoder
from sklearn.model_selection import KFold
from sklearn.metrics import accuracy_score
import lightgbm as lgb

from lightgbm_model import MyLightGBM
from catboost_model import MyCatboost
from xgboost_model import MyXgboost
%run lightgbm_model.py
%run catboost_model.py
%run xgboost_model.py

In [2]:
X = datasets.fetch_covtype().data[:3000]
y = datasets.fetch_covtype().target[:3000]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1)

print(X_train.shape, y_test.shape)
print(np.unique(y_train))  # 7分类任务

(2700, 54) (300,)
[1 2 3 4 5 6 7]


In [3]:
enc = OrdinalEncoder()
y_train_enc = enc.fit_transform(y_train.reshape(-1, 1)).reshape(-1, )
y_test_enc = enc.transform(y_test.reshape(-1, 1)).reshape(-1, )
print(np.unique(y_train_enc))
print(y_train_enc.shape)

[0. 1. 2. 3. 4. 5. 6.]
(2700,)


In [4]:
kfold = KFold(n_splits=5, shuffle=True, random_state=1)
es_func = lgb.early_stopping(stopping_rounds=200)
le_func = lgb.log_evaluation(100)

lgb_params = {"objective": "multiclass",
              "num_class": 7,
              "verbose": -4, "metric": ("multi_logloss",)}

cat_params = {"loss_function": "MultiClass",
              "eval_metric": "MultiClass",
              "allow_writing_files": False,
              "verbose": True,
              "thread_count": -1,
              "use_best_model": True
              }

xgb_params = {'objective': 'multi:softprob',
              "eval_metric": 'mlogloss',
              "verbosity": 0,
              'num_class': 7}

### lightgbm测试

In [5]:

lgb_train_pre, lgb_test_pre, lgb_model_list = MyLightGBM(X_train_data=X_train,
                                                         y_train_data=y_train_enc,
                                                         X_test_data=X_test,
                                                         kfold=kfold,
                                                         params=lgb_params,
                                                         callbacks=[es_func, le_func],
                                                         feval=None, fweight=None, categorical_feature="auto")

Training fold 1
Training until validation scores don't improve for 200 rounds
[100]	training's multi_logloss: 0.00631386	valid_1's multi_logloss: 0.410958
[200]	training's multi_logloss: 0.000125528	valid_1's multi_logloss: 0.572702
Early stopping, best iteration is:
[57]	training's multi_logloss: 0.0389058	valid_1's multi_logloss: 0.365149
Training fold 2
[100]	training's multi_logloss: 0.0063706	valid_1's multi_logloss: 0.471962
[200]	training's multi_logloss: 0.000145456	valid_1's multi_logloss: 0.639968
Early stopping, best iteration is:
[57]	training's multi_logloss: 0.0389058	valid_1's multi_logloss: 0.365149
Training fold 3
[100]	training's multi_logloss: 0.00611773	valid_1's multi_logloss: 0.414265
[200]	training's multi_logloss: 0.000132297	valid_1's multi_logloss: 0.552665
Early stopping, best iteration is:
[57]	training's multi_logloss: 0.0389058	valid_1's multi_logloss: 0.365149
Training fold 4
[100]	training's multi_logloss: 0.00556595	valid_1's multi_logloss: 0.591573
[20

In [6]:
print(lgb_train_pre.shape)
print(lgb_test_pre.shape)
print(lgb_model_list)

(2700, 7)
(300, 7)
[<lightgbm.basic.Booster object at 0x0000020F441D48E0>, <lightgbm.basic.Booster object at 0x0000020F066CC9D0>, <lightgbm.basic.Booster object at 0x0000020F066CC670>, <lightgbm.basic.Booster object at 0x0000020F066CCB20>, <lightgbm.basic.Booster object at 0x0000020F066CC790>]


### catboost测试

In [7]:
cat_train_pre, cat_test_pre, cat_model_list = MyCatboost(X_train_data=X_train,
                                                         y_train_data=y_train_enc,
                                                         X_test_data=X_test,
                                                         kfold=kfold,
                                                         params=cat_params,
                                                         num_class=7,
                                                         early_stopping_rounds=100,
                                                         verbose_eval=200,
                                                         fweight=None)

Training fold 1
Learning rate set to 0.109652
0:	learn: 1.7067987	test: 1.7135182	best: 1.7135182 (0)	total: 152ms	remaining: 2m 31s
200:	learn: 0.2907832	test: 0.4345651	best: 0.4345651 (200)	total: 1.63s	remaining: 6.46s
400:	learn: 0.1757640	test: 0.3791633	best: 0.3788905 (398)	total: 3.31s	remaining: 4.94s
600:	learn: 0.1178662	test: 0.3627976	best: 0.3622846 (595)	total: 4.87s	remaining: 3.23s
800:	learn: 0.0846530	test: 0.3559992	best: 0.3555747 (787)	total: 6.32s	remaining: 1.57s
999:	learn: 0.0647781	test: 0.3538373	best: 0.3524478 (933)	total: 7.74s	remaining: 0us

bestTest = 0.3524477958
bestIteration = 933

Shrink model to first 934 iterations.
Training fold 2
Learning rate set to 0.109652
0:	learn: 1.7442836	test: 1.7360204	best: 1.7360204 (0)	total: 7.79ms	remaining: 7.79s
200:	learn: 0.2789115	test: 0.4312189	best: 0.4312189 (200)	total: 1.4s	remaining: 5.55s
400:	learn: 0.1704427	test: 0.3955532	best: 0.3955532 (400)	total: 2.83s	remaining: 4.23s
600:	learn: 0.1154508	t

In [8]:
print(cat_train_pre.shape)
print(cat_test_pre.shape)
print(cat_model_list)

(2700, 7)
(300, 7)
[<catboost.core.CatBoost object at 0x0000020F441D4D60>, <catboost.core.CatBoost object at 0x0000020F441D45B0>, <catboost.core.CatBoost object at 0x0000020F441D4B20>, <catboost.core.CatBoost object at 0x0000020F066ED160>, <catboost.core.CatBoost object at 0x0000020F066ED130>]


### xgboost测试

In [9]:
xgb_train_pre, xgb_test_pre, xgb_model_list = MyXgboost(X_train_data=X_train,
                                                        y_train_data=y_train_enc,
                                                        X_test_data=X_test,
                                                        kfold=kfold,
                                                        params=xgb_params,
                                                        early_stopping_rounds=100,
                                                        verbose_eval=200,
                                                        feval=None,
                                                        fweight=None)

Training fold 1
[0]	train_-mlogloss:1.34951	val_-mlogloss:1.39749
[9]	train_-mlogloss:0.36473	val_-mlogloss:0.55932
Training fold 2
[0]	train_-mlogloss:1.35962	val_-mlogloss:1.40953
[9]	train_-mlogloss:0.36721	val_-mlogloss:0.54961
Training fold 3
[0]	train_-mlogloss:1.35524	val_-mlogloss:1.43400
[9]	train_-mlogloss:0.36018	val_-mlogloss:0.57457
Training fold 4
[0]	train_-mlogloss:1.34930	val_-mlogloss:1.43231
[9]	train_-mlogloss:0.36314	val_-mlogloss:0.60919
Training fold 5
[0]	train_-mlogloss:1.34789	val_-mlogloss:1.39165
[9]	train_-mlogloss:0.37150	val_-mlogloss:0.52809


In [10]:
print(xgb_train_pre.shape)
print(xgb_test_pre.shape)
print(xgb_model_list)

(2700, 7)
(300, 7)
[<xgboost.core.Booster object at 0x0000020F066ED8E0>, <xgboost.core.Booster object at 0x0000020F066EDAF0>, <xgboost.core.Booster object at 0x0000020F066ED580>, <xgboost.core.Booster object at 0x0000020F066EDBE0>, <xgboost.core.Booster object at 0x0000020F066ED550>]


In [11]:
xgb_score = accuracy_score(y_test_enc, np.argmax(xgb_test_pre, axis=1))
lgb_score = accuracy_score(y_test_enc, np.argmax(lgb_test_pre, axis=1))
cat_score = accuracy_score(y_test_enc, np.argmax(cat_test_pre, axis=1))

print("xgb score:", xgb_score)
print("lgb score:", lgb_score)
print("cat score", cat_score)

xgb score: 0.8366666666666667
lgb score: 0.87
cat score 0.8733333333333333
