In [1]:
from lightgbm import LGBMRegressor
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import GridSearchCV
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.externals import joblib



In [2]:
# 加载数据
iris = load_iris()
data = iris.data
target = iris.target

In [3]:
# 划分训练数据和测试数据
X_train, X_test, y_train, y_test = train_test_split(data, target, test_size=0.2)

In [4]:
# 模型训练
gbm = LGBMRegressor(objective='regression', num_leaves=31, learning_rate=0.05, n_estimators=20)
gbm.fit(X_train, y_train, eval_set=[(X_test, y_test)], eval_metric='l1', early_stopping_rounds=5)

[1]	valid_0's l1: 0.709278	valid_0's l2: 0.676587
Training until validation scores don't improve for 5 rounds
[2]	valid_0's l1: 0.682205	valid_0's l2: 0.623116
[3]	valid_0's l1: 0.656486	valid_0's l2: 0.574855
[4]	valid_0's l1: 0.630536	valid_0's l2: 0.531375
[5]	valid_0's l1: 0.605883	valid_0's l2: 0.492071
[6]	valid_0's l1: 0.583821	valid_0's l2: 0.456292
[7]	valid_0's l1: 0.561641	valid_0's l2: 0.424074
[8]	valid_0's l1: 0.540484	valid_0's l2: 0.394916
[9]	valid_0's l1: 0.520462	valid_0's l2: 0.368568
[10]	valid_0's l1: 0.501363	valid_0's l2: 0.344716
[11]	valid_0's l1: 0.484273	valid_0's l2: 0.322873
[12]	valid_0's l1: 0.467024	valid_0's l2: 0.303288
[13]	valid_0's l1: 0.450697	valid_0's l2: 0.285582
[14]	valid_0's l1: 0.43454	valid_0's l2: 0.268902
[15]	valid_0's l1: 0.419792	valid_0's l2: 0.254391
[16]	valid_0's l1: 0.405182	valid_0's l2: 0.240684
[17]	valid_0's l1: 0.391139	valid_0's l2: 0.225791
[18]	valid_0's l1: 0.378093	valid_0's l2: 0.214482
[19]	valid_0's l1: 0.366372	vali

LGBMRegressor(boosting_type='gbdt', class_weight=None, colsample_bytree=1.0,
              importance_type='split', learning_rate=0.05, max_depth=-1,
              min_child_samples=20, min_child_weight=0.001, min_split_gain=0.0,
              n_estimators=20, n_jobs=-1, num_leaves=31, objective='regression',
              random_state=None, reg_alpha=0.0, reg_lambda=0.0, silent=True,
              subsample=1.0, subsample_for_bin=200000, subsample_freq=0)

In [None]:
# 模型存储
joblib.dump(gbm, 'loan_model.pkl')
# 模型加载
gbm = joblib.load('loan_model.pkl')

In [5]:
# 模型预测
y_pred = gbm.predict(X_test, num_iteration=gbm.best_iteration_)

In [6]:
# 模型评估
print('The rmse of prediction is:', mean_squared_error(y_test, y_pred) ** 0.5)

# 特征重要度
print('Feature importances:', list(gbm.feature_importances_))

The rmse of prediction is: 0.44250290621518196
Feature importances: [4, 1, 28, 36]


In [7]:
# 网格搜索，参数优化
estimator = LGBMRegressor(num_leaves=31)
param_grid = {
    'learning_rate': [0.01, 0.1, 1],
    'n_estimators': [20, 40]
}
gbm = GridSearchCV(estimator, param_grid)
gbm.fit(X_train, y_train)
print('Best parameters found by grid search are:', gbm.best_params_)




Best parameters found by grid search are: {'learning_rate': 0.1, 'n_estimators': 40}
