In [221]:
from lightgbm import LGBMClassifier
from sklearn import datasets
from sklearn.model_selection import train_test_split
import numpy as np
from sklearn.preprocessing import OrdinalEncoder
from sklearn.model_selection import RandomizedSearchCV
from sklearn.metrics import log_loss, make_scorer

In [222]:
X = datasets.fetch_covtype().data[:3000]
y = datasets.fetch_covtype().target[:3000]
X_1, X_test, y_1, y_test = train_test_split(X, y, test_size=0.1)
X_train, X_va, y_train, y_va = train_test_split(X_1, y_1)

print(X_train.shape, y_train.shape)
print(np.unique(y_train))  # 7分类任务

(2025, 54) (2025,)
[1 2 3 4 5 6 7]


In [223]:
enc = OrdinalEncoder()
y_train_enc = enc.fit_transform(y_train.reshape(-1, 1)).reshape(-1, )
y_test_enc = enc.transform(y_test.reshape(-1, 1)).reshape(-1, )
y_va_enc = enc.fit_transform(y_va.reshape(-1, 1)).reshape(-1, )

print(np.unique(y_train_enc))
print(y_train_enc.shape)

[0. 1. 2. 3. 4. 5. 6.]
(2025,)


In [224]:
def my_custom_loss_func(clf, X, y):
    y_pred = clf.predict_proba(X)
    return log_loss(y, y_pred)


model = LGBMClassifier(n_jobs=-1)
param_dist = dict(reg_alpha=np.linspace(0, 1, 20),
                  subsample=np.linspace(0.5, 1.0, 20),
                  max_depth=[1, 2, 3, 4, 5, 6, -1],
                  min_child_samples=[15, 16, 17, 18, 19, 20, 21, 22, 23, 24])

random_search = RandomizedSearchCV(model,  # 需要超参数搜索的取值
                                   param_distributions=param_dist,
                                   # Number of parameter settings that are sampled. n_iter trades off runtime vs quality of the solution.
                                   n_iter=20,
                                   scoring=my_custom_loss_func,
                                   cv=5,
                                   random_state=2,
                                   n_jobs=-1)
random_search.fit(X_train, y_train_enc)

RandomizedSearchCV(cv=5, estimator=LGBMClassifier(), n_iter=20, n_jobs=-1,
                   param_distributions={'max_depth': [1, 2, 3, 4, 5, 6, -1],
                                        'min_child_samples': [15, 16, 17, 18,
                                                              19, 20, 21, 22,
                                                              23, 24],
                                        'reg_alpha': array([0.        , 0.05263158, 0.10526316, 0.15789474, 0.21052632,
       0.26315789, 0.31578947, 0.36842105, 0.42105263, 0.47368421,
       0.52631579, 0.57894737, 0.63157895, 0.68421053, 0.73684211,
       0.78947368, 0.84210526, 0.89473684, 0.94736842, 1.        ]),
                                        'subsample': array([0.5       , 0.52631579, 0.55263158, 0.57894737, 0.60526316,
       0.63157895, 0.65789474, 0.68421053, 0.71052632, 0.73684211,
       0.76315789, 0.78947368, 0.81578947, 0.84210526, 0.86842105,
       0.89473684, 0.92105263, 0.94736842, 0

In [225]:
random_search.best_params_

{'subsample': 0.8421052631578947,
 'reg_alpha': 0.8421052631578947,
 'min_child_samples': 17,
 'max_depth': 1}

In [226]:
random_search.best_score_

0.647336879736955

In [227]:
random_search.best_estimator_

LGBMClassifier(max_depth=1, min_child_samples=17, reg_alpha=0.8421052631578947,
               subsample=0.8421052631578947)