In [2]:
import datetime
import numpy as np
import pandas as pd
import lightgbm as lgb
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

## 线性回归

In [47]:
# 加载数据
breast = datasets.load_breast_cancer()
data = pd.DataFrame(breast.data,columns = [x.replace(' ','_') for x in breast.feature_names])
data['label']  = breast.target

In [50]:
# 特征工程
dftrain,dftest = train_test_split(data)
categorical_features = ['mean_radius','mean_texture']
lgb_train = lgb.Dataset(dftrain.drop(['label'],axis = 1),label=dftrain['label'],
                        categorical_feature = categorical_features)

lgb_valid = lgb.Dataset(dftest.drop(['label'],axis = 1),label=dftest['label'],
                        categorical_feature = categorical_features,
                        reference=lgb_train)


In [51]:
# 设置参数
boost_round = 20                 
early_stop_rounds = 10
params = {
    'boosting_type': 'gbdt',
    'objective':'binary',
    'metric': ['auc'],
    'num_leaves': 31,   
    'learning_rate': 0.05,
    'feature_fraction': 0.9,
    'bagging_fraction': 0.8,
    'bagging_freq': 5,
    'verbose': 0
}

In [52]:
# 训练模型 
results = {}
gbm = lgb.train(
    params,
    lgb_train,
    num_boost_round= boost_round,
    valid_sets=(lgb_valid, lgb_train),
    valid_names=('validate','train'),
    early_stopping_rounds=early_stop_rounds,
    evals_result= results
)


[1]	train's auc: 0.987252	validate's auc: 0.969713
Training until validation scores don't improve for 10 rounds
[2]	train's auc: 0.989725	validate's auc: 0.97525
[3]	train's auc: 0.992034	validate's auc: 0.981763
[4]	train's auc: 0.992734	validate's auc: 0.980243
[5]	train's auc: 0.992279	validate's auc: 0.979375
[6]	train's auc: 0.993154	validate's auc: 0.979049
[7]	train's auc: 0.993503	validate's auc: 0.978941
[8]	train's auc: 0.99439	validate's auc: 0.978289
[9]	train's auc: 0.994693	validate's auc: 0.979266
[10]	train's auc: 0.995358	validate's auc: 0.9797
[11]	train's auc: 0.995428	validate's auc: 0.980352
[12]	train's auc: 0.995451	validate's auc: 0.98122
[13]	train's auc: 0.995591	validate's auc: 0.98122
Early stopping, best iteration is:
[3]	train's auc: 0.992034	validate's auc: 0.981763


