In [None]:
# 导入所需的库
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.metrics import accuracy_score

# 生成一个模拟数据集：1000个样本，20个特征，2个分类
X, y = make_classification(n_samples=1000, n_features=20, n_informative=15, n_classes=2, random_state=42)

# 将数据集划分为训练集和测试集（80%训练，20%测试）
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 训练随机森林（Bagging方法）
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
rf_model.fit(X_train, y_train)
rf_pred = rf_model.predict(X_test)

# 训练梯度提升树（Boosting方法）
gbdt_model = GradientBoostingClassifier(n_estimators=100, learning_rate=0.1, random_state=42)
gbdt_model.fit(X_train, y_train)
gbdt_pred = gbdt_model.predict(X_test)

# 计算两种模型的准确率
rf_acc = accuracy_score(y_test, rf_pred)
gbdt_acc = accuracy_score(y_test, gbdt_pred)

print(f"随机森林（Bagging）准确率: {rf_acc:.4f}")
print(f"梯度提升树（Boosting）准确率: {gbdt_acc:.4f}")

# 可视化比较两种模型的准确率
labels = ['Random Forest', 'Gradient Boosting']
accuracies = [rf_acc, gbdt_acc]

plt.bar(labels, accuracies, color=['blue', 'green'])
plt.ylabel('Accuracy')
plt.title('Comparison of Bagging and Boosting')
plt.show()
