In [2]:
!pip install xgboost -i https://mirrors.aliyun.com/pypi/simple/ --trusted-host mirrors.aliyun.com

Looking in indexes: https://mirrors.aliyun.com/pypi/simple/
Collecting xgboost
  Downloading https://mirrors.aliyun.com/pypi/packages/97/ef/05245964011e4fc5aa0d86e2285a41de122ee1c30d69df05ecfd594bd608/xgboost-1.5.2-py3-none-win_amd64.whl (106.6 MB)
Installing collected packages: xgboost
Successfully installed xgboost-1.5.2


In [10]:
from sklearn import datasets
from sklearn.model_selection import train_test_split
import pandas as pd
import numpy as np
import xgboost as xgb

# 评价
from sklearn.metrics import precision_score, recall_score

# 导入数据
iris = datasets.load_iris()
data = iris.data
label = iris.target

data1 = pd.DataFrame(data)
# 花萼长宽花瓣长宽
data1.columns = ["sepal_l", "sepal_w", "petal_l", "petal_w"]
print(data1.head())
label1 = pd.DataFrame(label)
label1.columns = ["label"]
print(label1.head())
label1.label.value_counts()

   sepal_l  sepal_w  petal_l  petal_w
0      5.1      3.5      1.4      0.2
1      4.9      3.0      1.4      0.2
2      4.7      3.2      1.3      0.2
3      4.6      3.1      1.5      0.2
4      5.0      3.6      1.4      0.2
   label
0      0
1      0
2      0
3      0
4      0


2    50
1    50
0    50
Name: label, dtype: int64

In [2]:
# 划分数据集
train_x, test_x, train_y, test_y = train_test_split(data1.values,label1.values,test_size=0.3, random_state=42)
print("训练集长度",len(train_x))
print("测试集长度",len(test_x))

训练集长度 105
测试集长度 45


### 直接使用xgboost库

In [12]:
# 转换为DMatrix数据格式
test_data = xgb.DMatrix(test_x, label=test_y)

# 设置参数
# multi: softmax是使用softmax后产生的分类结果，而multi: softprob输出的是概率矩阵

# 参数
xgb_params = {
    "eta": 0.3, # 学习率
    "silent": True, # 输出运动讯息
    "objective": "multi:softprob", # 使用多分类生成概率矩阵格式“multi：softprob”
    "num_class": 3, # 类别数量
    "max_depth": 3, # 深度
}

num_round = 20 # 轮数

# 模型训练
model = xgb.train(xgb_params, xgb.DMatrix(train_x, label=train_y), num_round)
# 模型预测
test_pre = model.predict(test_data)

print(test_pre[:5])

# 选择最大概率的列
test_pre_1 = np.asarray([np.argmax(row) for row in test_pre])
print("test result:", test_pre_1)

# 模型评估
print("test precision:", precision_score(test_y, test_pre_1, average="macro"))
print("test recall:", recall_score(test_y, test_pre_1, average="macro"))

Parameters: { "silent" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.


[[0.00650657 0.96226174 0.03123167]
 [0.970643   0.02533228 0.00402478]
 [0.0033913  0.00692109 0.9896876 ]
 [0.00654362 0.9677424  0.02571394]
 [0.00615641 0.9104776  0.083366  ]]
test result: [1 0 2 1 1 0 1 2 1 1 2 0 0 0 0 1 2 1 1 2 0 2 0 2 2 2 2 2 0 0 0 0 1 0 0 2 1
 0 0 0 2 1 1 0 0]
test precision: 1.0
test recall: 1.0


### sklearn接口形式使用Xgboost

In [13]:
from xgboost import XGBClassifier

model = XGBClassifier(
    learning_rate=0.01, # 学习率
    n_esimators=3000, # 步长
    max_depth=4, # 深度
    objective="binary:logistic",# 二分类 逻辑回归
    seed=27
)

model.fit(train_x, train_y)

# 预测结果
test_pre_2 = model.predict(test_x)
print(test_pre_2)

# 模型评估
print("test precision:", precision_score(test_y, test_pre_2, average="macro"))
print("test recall:", recall_score(test_y, test_pre_2, average="macro"))

Parameters: { "n_esimators" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.


[1 0 2 1 1 0 1 2 1 1 2 0 0 0 0 1 2 1 1 2 0 2 0 2 2 2 2 2 0 0 0 0 1 0 0 2 1
 0 0 0 2 1 1 0 0]
test precision: 1.0
test recall: 1.0


  return f(**kwargs)
