## 关于LGB的分类问题

In [1]:
# 生成数据集
import numpy as np
import pandas as pd

## 模拟数据

In [4]:

num_inst = 1000
np.random.seed(666)
X = np.random.rand(num_inst, 10)
y = np.random.rand(num_inst)

df = pd.DataFrame(X, columns=['f' + str(i) for i in range(10)])
df['y'] = y
df.head()

Unnamed: 0,f0,f1,f2,f3,f4,f5,f6,f7,f8,f9,y
0,0.700437,0.844187,0.676514,0.727858,0.951458,0.012703,0.413588,0.048813,0.099929,0.508066,0.859992
1,0.200248,0.744154,0.192892,0.700845,0.293228,0.774479,0.005109,0.112858,0.110954,0.247668,0.189333
2,0.023236,0.727321,0.340035,0.197503,0.90918,0.978347,0.532803,0.259132,0.583813,0.325691,0.510097
3,0.888899,0.626405,0.818874,0.547345,0.416712,0.743047,0.369596,0.075167,0.775193,0.219409,0.211844
4,0.079342,0.486781,0.153674,0.828465,0.191369,0.270409,0.561034,0.90238,0.851788,0.418082,0.252643


In [5]:
from lightgbm import LGBMRegressor

regr = LGBMRegressor(n_estimators=100, learning_rate=0.1, num_leaves=31, max_depth=-1, min_child_samples=20,
                     min_child_weight=0, subsample=0.8, subsample_freq=1, colsample_bytree=0.8, reg_alpha=0,
                     reg_lambda=0, random_state=666)
regr.fit(df[['f0', 'f1', 'f2', 'f3', 'f4', 'f5', 'f6', 'f7', 'f8', 'f9']], df['y'])

LGBMRegressor(colsample_bytree=0.8, min_child_weight=0, random_state=666,
              reg_alpha=0, reg_lambda=0, subsample=0.8, subsample_freq=1)

In [7]:
regr.feature_importances_

array([309, 239, 288, 284, 274, 307, 345, 305, 283, 314])

In [9]:
y_hat = regr.predict(df[['f0', 'f1', 'f2', 'f3', 'f4', 'f5', 'f6', 'f7', 'f8', 'f9']])
y_hat

array([0.72432611, 0.21661332, 0.52489857, 0.27776193, 0.21665954,
       0.57009761, 0.32970492, 0.70671295, 0.17507509, 0.55877683,
       0.78996983, 0.1728458 , 0.19594943, 0.62827542, 0.51044912,
       0.31696403, 0.42009564, 0.7284198 , 0.765629  , 0.13466507,
       0.66488375, 0.37286525, 0.75969425, 0.19282119, 0.67755297,
       0.71869675, 0.583167  , 0.82205884, 0.31889712, 0.32234512,
       0.75363454, 0.25490929, 0.35015609, 0.51796785, 0.18500135,
       0.40417773, 0.65328736, 0.71287774, 0.6655001 , 0.70542194,
       0.5926815 , 0.33432967, 0.38685209, 0.48660552, 0.36057655,
       0.63709851, 0.11903205, 0.3833754 , 0.34419348, 0.15560166,
       0.28538587, 0.89743048, 0.81939413, 0.18086644, 0.68139722,
       0.31630776, 0.73862569, 0.48992441, 0.67063442, 0.47285767,
       0.16220837, 0.66231085, 0.24362906, 0.31637006, 0.6132668 ,
       0.72905768, 0.27801378, 0.38736165, 0.24106021, 0.70799828,
       0.50078102, 0.29581977, 0.32886403, 0.80552563, 0.25051

In [18]:
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error

r2 = r2_score(y, y_hat)
mse = mean_squared_error(y, y_hat)
mae = mean_absolute_error(y, y_hat)

print('r2:', r2)
print('mse:', mse)
print('mae:', mae)

r2: 0.8810853167748454
mse: 0.010369871611612543
mae: 0.08369749259384747
