In [None]:
#XGBoost model 7
import xgboost as xgb
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import (classification_report, 
                           roc_auc_score, 
                           precision_recall_curve,
                           average_precision_score,
                           confusion_matrix)
import matplotlib.pyplot as plt
import shap
import numpy as np

# 计算类别权重（假设使用原始未平衡数据计算）
scale_pos_weight = 1  

# XGBoost参数
'''params_xgb = {
    'learning_rate': 0.02,
    'booster': 'gbtree',
    'objective': 'binary:logistic',
    'verbosity': 1,
    'seed': 42,
    'nthread': -1,
    'colsample_bytree': 0.6,
    'subsample': 0.7,
    'eval_metric': 'logloss',
    'scale_pos_weight': scale_pos_weight  # 根据实际情况调整
}

# 参数网格
param_grid = {
    'n_estimators': [50, 100, 200, 300],
    'max_depth': [3, 5, 7,9],
    'learning_rate': [0.01, 0.02, 0.05, 0.1]
}'''


params_xgb = {
    'learning_rate': 0.01,
    'booster': 'gbtree',
    'objective': 'binary:logistic',
    'verbosity': 1,
    'seed': 42,
    'nthread': -1,
    'colsample_bytree': 0.5,
    'subsample': 0.6,
    'eval_metric': 'logloss',
    'scale_pos_weight': scale_pos_weight,  # 根据实际情况调整
    'gamma': 0.1,
    'min_child_weight': 5,
    'reg_alpha': 0.1,
    'reg_lambda': 1.0
}

# 参数网格
param_grid = {
    'n_estimators': [100, 200, 300],
    'max_depth': [2, 4, 6],
    'learning_rate': [0.01]
}

# 网格搜索
grid_search = GridSearchCV(
    estimator=xgb.XGBClassifier(**params_xgb),
    param_grid=param_grid,
    scoring='roc_auc',
    cv=5,
    n_jobs=-1,
    verbose=1
)

# 训练模型
grid_search.fit(X_train_balanced, y_train_balanced)
XGB_model = grid_search.best_estimator_

# 模型预测
y_pred_xgb = XGB_model.predict(X_test)
y_proba_xgb = XGB_model.predict_proba(X_test)[:, 1]

# 评估指标
print("Best Parameters:", grid_search.best_params_)
print("AUC-ROC:", roc_auc_score(y_test, y_proba_xgb))
ap = average_precision_score(y_test, y_proba_xgb)
print("Average Precision:", ap)
print("\nClassification Report:\n", classification_report(y_test, y_pred_xgb))

# 计算 F1 - score
f1 = f1_score(y_test, y_pred_xgb)
print("XGBoost  F1 - score:", f1)


# 计算 ROC 曲线
fpr, tpr, thresholds = roc_curve(y_test, y_proba_xgb)

# 绘制 ROC 曲线
plt.figure(figsize=(8, 6))
plt.plot(fpr, tpr, label=f'ROC Curve (AUC = {roc_auc_score(y_test, y_proba_xgb):.2f})')
plt.plot([0, 1], [0, 1], 'k--')
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('XGBoost Model ROC Curve')
plt.legend()
plt.show()

# 绘制 PR 曲线
#precision, recall, _ = precision_recall_curve(y_test, y_proba_xgb)
#ap = average_precision_score(y_test, y_proba_xgb)

#plt.figure(figsize=(8, 6))
#plt.plot(recall, precision, label=f'AP={ap:.2f}')
#plt.plot(recall, precision, marker='.')
#plt.xlabel('Recall')
#plt.ylabel('Precision')
#plt.title('XGBoost Model Precision - Recall Curve')
#plt.grid(True)
#plt.show()


# PR曲线
precision, recall, _ = precision_recall_curve(y_test, y_proba_xgb)
ap = average_precision_score(y_test, y_proba_xgb)

plt.figure(figsize=(8, 6))
plt.plot(recall, precision, label=f'AP={ap:.2f}')
plt.xlabel('Recall')
plt.ylabel('Precision')
plt.title(' XGBoost Precision-Recall Curve')
plt.legend()
plt.grid(True)
plt.show()

# 绘制混淆矩阵
cm = confusion_matrix(y_test, y_pred_xgb)
plt.figure(figsize=(8, 6))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')
plt.xlabel('Predicted Label')
plt.ylabel('True Label')
plt.title('XGBoost Model Confusion Matrix')
plt.show(

In [None]:
# SVM model 2

SVM_model = SVC(
    class_weight='balanced',  # 调整类别权重
    probability=True,         # 启用概率预测（用于计算AUC-ROC）
    kernel='rbf',             # 常用非线性核
    random_state=42
)
SVM_model.fit(X_train_balanced, y_train_balanced)

# 预测测试集
y_pred_SVM = SVM_model.predict(X_test)
y_proba_SVM = SVM_model.predict_proba(X_test)[:, 1]

# 评估
print("\nSVM AUC-ROC:", roc_auc_score(y_test, y_proba_SVM))
print("SVM分类报告:\n", classification_report(y_test, y_pred_SVM))

# 计算 F1 - score
'''f1 = f1_score(y_test, y_pred_SVM)
print("SVM  F1 - score:", f1)

## 绘制 PR 曲线
precision, recall, _ = precision_recall_curve(y_test, y_proba_SVM)
ap = average_precision_score(y_test, y_proba_SVM)

plt.figure(figsize=(8, 6))
plt.plot(recall, precision, label=f'AP={ap:.2f}')
plt.xlabel('Recall')
plt.ylabel('Precision')
plt.title('SVM Model Precision - Recall Curve')
plt.legend()
plt.grid(True)
plt.show()

# 绘制混淆矩阵
cm = confusion_matrix(y_test, y_pred_SVM)
plt.figure(figsize=(8, 6))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')
plt.xlabel('Predicted Label')
plt.ylabel('True Label')
plt.title('SVM Confusion Matrix')
plt.show()

# 绘制 AUC - ROC 图
fpr, tpr, _ = roc_curve(y_test, y_proba_SVM)
roc_auc = roc_auc_score(y_test, y_proba_SVM)
plt.figure(figsize=(8, 6))
plt.plot(fpr, tpr, label=f'AUC = {roc_auc:.2f}')
plt.plot([0, 1], [0, 1], 'k--')
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('SVM Model ROC Curve')
plt.legend()
plt.grid(True)
plt.show()'''

In [None]:
#DecisionTree Model 3
#决策树（可自动处理非线性关系）
dt_model = DecisionTreeClassifier(
    class_weight='balanced', 
    max_depth=5,            # 防止过拟合
    random_state=42
)
dt_model.fit(X_train_balanced, y_train_balanced)
y_pred_dt = dt_model.predict(X_test)

#评估
print("\n决策树分类报告:\n", classification_report(y_test, y_pred_dt))
# 计算 F1 - score
'''f1 = f1_score(y_test, y_pred_dt)
print("DecisionTree  F1 - score:", f1)

# 计算预测概率
y_proba_dt = dt_model.predict_proba(X_test)[:, 1] 

# 计算 ROC 曲线
fpr, tpr, thresholds = roc_curve(y_test, y_proba_dt)

# 计算 AUC 值
auc = roc_auc_score(y_test, y_proba_dt)

# 绘制 ROC 曲线
plt.figure(figsize=(8, 6))
plt.plot(fpr, tpr, label=f'ROC Curve (AUC = {auc:.2f})')
plt.plot([0, 1], [0, 1], 'k--')
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('DecisionTree Model ROC Curve')
plt.legend()
plt.show()

# 绘制 PR 曲线
precision, recall, _ = precision_recall_curve(y_test, y_proba_dt)
ap = average_precision_score(y_test, y_proba_dt)

plt.figure(figsize=(8, 6))
plt.plot(recall, precision, label=f'AP={ap:.2f}')
plt.xlabel('Recall')
plt.ylabel('Precision')
plt.title('DecisionTree Model Precision - Recall Curve')
plt.legend()
plt.grid(True)
plt.show()

# 绘制混淆矩阵
cm = confusion_matrix(y_test, y_pred_dt)
plt.figure(figsize=(8, 6))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')
plt.xlabel('Predicted Label')
plt.ylabel('True Label')
plt.title('DecisionTree Model Confusion Matrix')
plt.show()'''

In [None]:
#RF model 4
# 定义参数网格
param_grid = {
    'n_estimators': [100, 200, 300],
    'max_depth': [3, 5, 7],
    'min_samples_split': [2, 5, 10]
}

# 创建网格搜索对象
grid_search = GridSearchCV(
    estimator=RandomForestClassifier(random_state=42),
    param_grid=param_grid,
    scoring='roc_auc',
    cv=5,
    n_jobs=-1
)

# 进行网格搜索
grid_search.fit(X_train_balanced, y_train_balanced)

# 获取最优模型
RF = grid_search.best_estimator_

# 预测测试集
y_pred_RF = RF.predict(X_test)
y_proba_RF = RF.predict_proba(X_test)[:, 1]

# 评估模型
print("AUC-ROC:", roc_auc_score(y_test, y_proba_RF)) 
print("\n分类报告（反映真实场景）:\n", classification_report(y_test, y_pred_RF, zero_division=0))

    
# 计算 F1 - score
'''f1 = f1_score(y_test, y_pred_RF)
print("RF  F1 - score:", f1)
    
# 计算预测概率
y_proba_RF = RF.predict_proba(X_test)[:, 1]  

# 计算 ROC 曲线
fpr, tpr, thresholds = roc_curve(y_test, y_proba_RF)

# 计算 AUC 值
auc = roc_auc_score(y_test, y_proba_RF)

# 绘制 ROC 曲线
plt.figure(figsize=(8, 6))
plt.plot(fpr, tpr, label=f'ROC Curve (AUC = {auc:.2f})')
plt.plot([0, 1], [0, 1], 'k--')
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('RF Model ROC Curve')
plt.legend()
plt.show()

# 绘制 PR 曲线
precision, recall, _ = precision_recall_curve(y_test, y_proba_RF)
ap = average_precision_score(y_test, y_proba_RF)

plt.figure(figsize=(8, 6))
plt.plot(recall, precision, label=f'AP={ap:.2f}')
plt.xlabel('Recall')
plt.ylabel('Precision')
plt.title('RF Model Precision - Recall Curve')
plt.legend()
plt.grid(True)
plt.show()

# 绘制混淆矩阵
cm = confusion_matrix(y_test, y_pred_RF)
plt.figure(figsize=(8, 6))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')
plt.xlabel('Predicted Label')
plt.ylabel('True Label')
plt.title('RF Model Confusion Matrix')
plt.show()'''

In [None]:
#AdaBoost model 5
from sklearn.ensemble import AdaBoostClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import classification_report, confusion_matrix
from imblearn.over_sampling import SMOTE
import numpy as np
import pandas as pd

# 初始化 AdaBoost 分类器
# 使用决策树桩作为弱分类器
adaboost = AdaBoostClassifier(
    estimator=DecisionTreeClassifier(max_depth=1),  # 修改为 'estimator'
    n_estimators=50,
    random_state=42
)

# 训练模型
adaboost.fit(X_train_balanced, y_train_balanced)

# 预测测试集
y_pred = adaboost.predict(X_test)

# 打印分类报告
print("分类报告：")
print(classification_report(y_test, y_pred))

# 计算 F1 - score
'''f1 = f1_score(y_test, y_pred)
print("AdaBoost  F1 - score:", f1)

#计算预测概率
y_proba_adaboost = adaboost.predict_proba(X_test)[:, 1]

# 计算 ROC 曲线
fpr, tpr, thresholds = roc_curve(y_test, y_proba_adaboost)

# 计算 AUC 值
auc = roc_auc_score(y_test, y_proba_adaboost)

# 绘制 ROC 曲线
plt.figure(figsize=(8, 6))
plt.plot(fpr, tpr, label=f'ROC Curve (AUC = {auc:.2f})')
plt.plot([0, 1], [0, 1], 'k--')
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('AdaBoost model (ROC) Curve')
plt.legend()
plt.show()

# 绘制 PR 曲线
precision, recall, _ = precision_recall_curve(y_test, y_proba_adaboost)
ap = average_precision_score(y_test, y_proba_adaboost)

plt.figure(figsize=(8, 6))
plt.plot(recall, precision, label=f'AP={ap:.2f}')
plt.xlabel('Recall')
plt.ylabel('Precision')
plt.title('AdaBoost Model Precision - Recall Curve')
plt.legend()
plt.grid(True)
plt.show()

# 绘制混淆矩阵
cm = confusion_matrix(y_test, y_pred_cart)
plt.figure(figsize=(8, 6))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')
plt.xlabel('Predicted Label')
plt.ylabel('True Label')
plt.title('AdaBoost Model Confusion Matrix')
plt.show()'''

In [None]:
#GBDT model 6
#梯度提升树-GBDT
gb_model = GradientBoostingClassifier(
    n_estimators=100,
    max_depth=3,
    random_state=42
)
gb_model.fit(X_train_balanced, y_train_balanced)
y_pred_gb = gb_model.predict(X_test)
print("\n梯度提升树分类报告:\n", classification_report(y_test, y_pred_gb))


# 计算 F1 - score
'''f1 = f1_score(y_test, y_pred_gb)
print("GBDT  F1 - score:", f1)

# 计算预测概率
y_proba_gb = gb_model.predict_proba(X_test)[:, 1]  

# 计算 ROC 曲线
fpr, tpr, thresholds = roc_curve(y_test, y_proba_gb)

# 计算 AUC 值
auc = roc_auc_score(y_test, y_proba_gb)

# 绘制 ROC 曲线
plt.figure(figsize=(8, 6))
plt.plot(fpr, tpr, label=f'ROC Curve (AUC = {auc:.2f})')
plt.plot([0, 1], [0, 1], 'k--')
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('GBDT Model ROC Curve')
plt.legend()
plt.show()

# 绘制 PR 曲线
precision, recall, _ = precision_recall_curve(y_test, y_proba_gb)
ap = average_precision_score(y_test, y_proba_gb)

plt.figure(figsize=(8, 6))
plt.plot(recall, precision, label=f'AP={ap:.2f}')
plt.xlabel('Recall')
plt.ylabel('Precision')
plt.title('GBDT Model Precision - Recall Curve')
plt.legend()
plt.grid(True)
plt.show()

# 绘制混淆矩阵
cm = confusion_matrix(y_test, y_pred_gb)
plt.figure(figsize=(8, 6))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')
plt.xlabel('Predicted Label')
plt.ylabel('True Label')
plt.title('GBDT Model Confusion Matrix')
plt.show()'''

In [None]:
#逻辑回归模型---LR model 1
LR_model = LogisticRegression(
    class_weight='balanced',  # 自动调整类别权重（可选，但推荐在不平衡场景使用）
    max_iter=1000,            # 确保收敛
    random_state=42
)
LR_model.fit(X_train_balanced, y_train_balanced)

# 预测测试集
LR_y_pred = LR_model.predict(X_test)
LR_y_proba = LR_model.predict_proba(X_test)[:, 1]
# 评估
print("\n逻辑回归 AUC-ROC:", roc_auc_score(y_test, LR_y_proba))
print("逻辑回归分类报告:\n", classification_report(y_test, LR_y_pred))

# 计算 F1 - score
'''f1 = f1_score(y_test, LR_y_pred)
print("逻辑回归 F1 - score:", f1)

## 绘制 PR 曲线
precision, recall, _ = precision_recall_curve(y_test, LR_y_proba)
ap = average_precision_score(y_test, LR_y_proba)

plt.figure(figsize=(8, 6))
plt.plot(recall, precision, label=f'AP={ap:.2f}')
plt.xlabel('Recall')
plt.ylabel('Precision')
plt.title('LR Model Precision - Recall Curve')
plt.legend()
plt.grid(True)
plt.show()

# 绘制混淆矩阵
cm = confusion_matrix(y_test, LR_y_pred)
plt.figure(figsize=(8, 6))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')
plt.xlabel('Predicted Label')
plt.ylabel('True Label')
plt.title('LR Confusion Matrix')
plt.show()

# 绘制 AUC - ROC 图
fpr, tpr, _ = roc_curve(y_test, LR_y_proba)
roc_auc = roc_auc_score(y_test, LR_y_proba)
plt.figure(figsize=(8, 6))
plt.plot(fpr, tpr, label=f'AUC = {roc_auc:.2f}')
plt.plot([0, 1], [0, 1], 'k--')
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('LR Model ROC Curve')
plt.legend()
plt.grid(True)
plt.show()'''

In [None]:
#C4.5 model 
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import classification_report, confusion_matrix, roc_curve, roc_auc_score
import matplotlib.pyplot as plt

# 初始化 C4.5 模型（通过设置分裂标准为信息增益率）
# Scikit-learn 的 DecisionTreeClassifier 默认使用基尼不纯度（Gini Impurity），但可以通过设置 criterion="entropy" 和 splitter="best" 模拟 C4.5 的行为
c45_model = DecisionTreeClassifier(criterion="entropy", random_state=42)

# 训练模型
c45_model.fit(X_train_balanced, y_train_balanced)

# 预测测试集
y_pred_c45 = c45_model.predict(X_test)

# 打印分类报告
print("分类报告：")
print(classification_report(y_test, y_pred_c45))

# 计算 F1 - score
'''f1 = f1_score(y_test, y_pred_c45)
print("C4.5  F1 - score:", f1)


# 计算预测概率
y_proba_c45 = c45_model.predict_proba(X_test)[:, 1]  # 注意这里使用 X_test 而不是 X_test_discretized

# 计算 ROC 曲线
fpr, tpr, thresholds = roc_curve(y_test, y_proba_c45)

# 计算 AUC 值
auc = roc_auc_score(y_test, y_proba_c45)

# 绘制 ROC 曲线
plt.figure(figsize=(8, 6))
plt.plot(fpr, tpr, label=f'ROC Curve (AUC = {auc:.2f})')
plt.plot([0, 1], [0, 1], 'k--')
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('C4.5 Model ROC Curve')
plt.legend()
plt.show()


# 绘制 PR 曲线
precision, recall, _ = precision_recall_curve(y_test, y_proba_c45)
ap = average_precision_score(y_test, y_proba_c45)

plt.figure(figsize=(8, 6))
plt.plot(recall, precision, label=f'AP={ap:.2f}')
plt.xlabel('Recall')
plt.ylabel('Precision')
plt.title('C4.5 Model Precision - Recall Curve')
plt.legend()
plt.grid(True)
plt.show()

# 绘制混淆矩阵
cm = confusion_matrix(y_test, y_pred_c45)
plt.figure(figsize=(8, 6))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')
plt.xlabel('Predicted Label')
plt.ylabel('True Label')
plt.title('C4.5 Model Confusion Matrix')
plt.show()'''

In [None]:
#Bagging model 11
from sklearn.ensemble import BaggingClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import classification_report, confusion_matrix, roc_curve, roc_auc_score
from imblearn.over_sampling import SMOTE
from sklearn.preprocessing import KBinsDiscretizer
from sklearn.compose import ColumnTransformer
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# 数据预处理与之前相同

# 假设 X_train_balanced 和 y_train_balanced 已经定义完毕

# 初始化基分类器（决策树）
base_model = DecisionTreeClassifier(criterion="entropy", random_state=42)

# 初始化 Bagging 模型
bagging_model = BaggingClassifier(
    estimator=base_model,  # 使用 'base_estimator'
    n_estimators=100,           # 基分类器数量
    random_state=42
)

# 训练 Bagging 模型
bagging_model.fit(X_train_balanced, y_train_balanced)

# 预测测试集
y_pred_bagging = bagging_model.predict(X_test)

# 输出分类报告
print("分类报告：")
print(classification_report(y_test, y_pred_bagging))

# 计算 F1 - score
'''f1 = f1_score(y_test, y_pred_bagging)
print("Bagging  F1 - score:", f1)


# 计算预测概率
y_proba_bagging = bagging_model.predict_proba(X_test)[:, 1]

# 计算 ROC 曲线
fpr, tpr, thresholds = roc_curve(y_test, y_proba_bagging)

# 计算 AUC 值
auc = roc_auc_score(y_test, y_proba_bagging)

# 绘制 ROC 曲线
plt.figure(figsize=(8, 6))
plt.plot(fpr, tpr, label=f'ROC Curve (AUC = {auc:.2f})')
plt.plot([0, 1], [0, 1], 'k--')
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Bagging Model ROC Curve')
plt.legend()
plt.show()

# 绘制 PR 曲线
precision, recall, _ = precision_recall_curve(y_test, y_proba_bagging)
ap = average_precision_score(y_test, y_proba_bagging)

plt.figure(figsize=(8, 6))
plt.plot(recall, precision, label=f'AP={ap:.2f}')
plt.xlabel('Recall')
plt.ylabel('Precision')
plt.title('Bagging Model Precision - Recall Curve')
plt.legend()
plt.grid(True)
plt.show()

# 绘制混淆矩阵
cm = confusion_matrix(y_test, y_pred_bagging)
plt.figure(figsize=(8, 6))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')
plt.xlabel('Predicted Label')
plt.ylabel('True Label')
plt.title('Bagging Model Confusion Matrix')
plt.show()

# 保存图像
# 指定保存路径
#save_path = ("C:\\Users\\lenovo\\毕设-20212133050")
#plt.savefig(save_path+'\\roc_curve.png')'''

In [None]:
#Perceptron model 3
from sklearn.linear_model import Perceptron
from sklearn.metrics import classification_report, roc_curve, roc_auc_score, precision_recall_curve, average_precision_score, confusion_matrix
import matplotlib.pyplot as plt
import seaborn as sns

# 假设你已经定义了 X_train_balanced, y_train_balanced, X_test, y_test

# 初始化感知机模型
perceptron_model = Perceptron(
    class_weight='balanced',
    random_state=42
)

# 训练模型
perceptron_model.fit(X_train_balanced, y_train_balanced)

# 预测测试集
y_pred_perceptron = perceptron_model.predict(X_test)

# 评估测试集
print("感知机分类报告:\n", classification_report(y_test, y_pred_perceptron))

# 计算 F1 - score
f1 = f1_score(y_test, y_pred_perceptron)
print("Perceptron  F1 - score:", f1)

# 计算预测概率（使用 decision_function 替代 predict_proba）
y_proba_perceptron_test = perceptron_model.decision_function(X_test)
y_proba_perceptron_train = perceptron_model.decision_function(X_train_balanced)

# 计算 ROC 曲线和 AUC 值
fpr_test, tpr_test, _ = roc_curve(y_test, y_proba_perceptron_test)
auc_test = roc_auc_score(y_test, y_proba_perceptron_test)

fpr_train, tpr_train, _ = roc_curve(y_train_balanced, y_proba_perceptron_train)
auc_train = roc_auc_score(y_train_balanced, y_proba_perceptron_train)

# 绘制 ROC 曲线
plt.figure(figsize=(10, 8))

# 训练集 ROC 曲线
plt.plot(fpr_train, tpr_train, linestyle='--', label=f'Perceptron Train (AUC = {auc_train:.2f})')

# 测试集 ROC 曲线
plt.plot(fpr_test, tpr_test, label=f'Perceptron Test (AUC = {auc_test:.2f})')

# 对角线
plt.plot([0, 1], [0, 1], 'k--')

# 图表标题和标签
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Perceptron ROC Curves')
plt.legend()
plt.grid(True)
plt.show()

# 绘制 PR 曲线
precision_test, recall_test, _ = precision_recall_curve(y_test, y_proba_perceptron_test)
ap_test = average_precision_score(y_test, y_proba_perceptron_test)

precision_train, recall_train, _ = precision_recall_curve(y_train_balanced, y_proba_perceptron_train)
ap_train = average_precision_score(y_train_balanced, y_proba_perceptron_train)

plt.figure(figsize=(10, 8))

# 训练集 PR 曲线
plt.plot(recall_train, precision_train, linestyle='--', label=f'Perceptron Train (AP = {ap_train:.2f})')

# 测试集 PR 曲线
plt.plot(recall_test, precision_test, label=f'Perceptron Test (AP = {ap_test:.2f})')

# 图表标题和标签
plt.xlabel('Recall')
plt.ylabel('Precision')
plt.title('Perceptron Precision-Recall Curves')
plt.legend()
plt.grid(True)
plt.show()

# 绘制混淆矩阵
cm = confusion_matrix(y_test, y_pred_perceptron)
plt.figure(figsize=(8, 6))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')
plt.xlabel('Predicted Label')
plt.ylabel('True Label')
plt.title('Perceptron Confusion Matrix')
plt.show()

In [None]:
#Perceptron model 3
from sklearn.linear_model import Perceptron
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, roc_curve, roc_auc_score
from imblearn.over_sampling import SMOTE
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# 初始化感知机模型
perceptron_model = Perceptron(
    class_weight='balanced',
    random_state=42
)

# 训练模型
perceptron_model.fit(X_train_balanced, y_train_balanced)

# 预测测试集
y_pred_perceptron = perceptron_model.predict(X_test)

# 评估
print("感知机分类报告:\n", classification_report(y_test, y_pred_perceptron))

# 计算 F1 - score
f1 = f1_score(y_test, y_pred_perceptron)
print("Perceptron  F1 - score:", f1)

# 计算预测概率
y_proba_perceptron = perceptron_model.decision_function(X_test)  # 使用 decision_function 替代 predict_proba

# 计算 ROC 曲线
fpr, tpr, thresholds = roc_curve(y_test, y_proba_perceptron)

# 计算 AUC 值
auc = roc_auc_score(y_test, y_proba_perceptron)

# 绘制 ROC 曲线
plt.figure(figsize=(8, 6))
plt.plot(fpr, tpr, label=f'ROC Curve (AUC = {auc:.2f})')
plt.plot([0, 1], [0, 1], 'k--')
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Perceptron Model ROC Curve')
plt.legend()
plt.show()

## 绘制 PR 曲线
precision, recall, _ = precision_recall_curve(y_test, y_proba_perceptron)
ap = average_precision_score(y_test, y_proba_perceptron)

plt.figure(figsize=(8, 6))
plt.plot(recall, precision, label=f'AP={ap:.2f}')
plt.xlabel('Recall')
plt.ylabel('Precision')
plt.title('perceptron Model Precision - Recall Curve')
plt.legend()
plt.grid(True)
plt.show()


# 绘制混淆矩阵
cm = confusion_matrix(y_test, y_pred_perceptron)
plt.figure(figsize=(8, 6))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')
plt.xlabel('Predicted Label')
plt.ylabel('True Label')
plt.title('perceptron Confusion Matrix')
plt.show()

In [None]:
#CART model--决策树 7
from sklearn.tree import plot_tree
import matplotlib.pyplot as plt
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import classification_report, confusion_matrix, roc_curve, roc_auc_score

# 初始化 CART 模型
cart_model = DecisionTreeClassifier(random_state=42)

# 训练模型
cart_model.fit(X_train_balanced, y_train_balanced)

# 预测测试集
y_pred_cart = cart_model.predict(X_test)

# 打印分类报告
print("分类报告：")
print(classification_report(y_test, y_pred_cart))

# 计算 F1 - score
'''f1 = f1_score(y_test, y_pred_cart)
print("CART  F1 - score:", f1)

# 计算预测概率
y_proba_cart = cart_model.predict_proba(X_test)[:, 1]  

# 计算 ROC 曲线
fpr, tpr, thresholds = roc_curve(y_test, y_proba_cart)

# 计算 AUC 值
auc = roc_auc_score(y_test, y_proba_cart)

# 绘制 ROC 曲线
plt.figure(figsize=(8, 6))
plt.plot(fpr, tpr, label=f'ROC Curve (AUC = {auc:.2f})')
plt.plot([0, 1], [0, 1], 'k--')
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('CART Model ROC Curve')
plt.legend()
plt.show()

# 绘制 PR 曲线
precision, recall, _ = precision_recall_curve(y_test, y_proba_cart)
ap = average_precision_score(y_test, y_proba_cart)

plt.figure(figsize=(8, 6))
plt.plot(recall, precision, label=f'AP={ap:.2f}')
plt.xlabel('Recall')
plt.ylabel('Precision')
plt.title('CART Model Precision - Recall Curve')
plt.legend()
plt.grid(True)
plt.show()

# 绘制混淆矩阵
cm = confusion_matrix(y_test, y_pred_cart)
plt.figure(figsize=(8, 6))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')
plt.xlabel('Predicted Label')
plt.ylabel('True Label')
plt.title('CART Model Confusion Matrix')
plt.show()'''

In [None]:
#DecisionTree Model 4
#决策树（可自动处理非线性关系）
dt_model = DecisionTreeClassifier(
    class_weight='balanced', 
    max_depth=5,            # 防止过拟合
    random_state=42
)
dt_model.fit(X_train_balanced, y_train_balanced)
y_pred_dt = dt_model.predict(X_test)
print("\n决策树分类报告:\n", classification_report(y_test, y_pred_dt))

# 计算 F1 - score
'''f1 = f1_score(y_test, y_pred_dt)
print("DecisionTree  F1 - score:", f1)

# 计算预测概率
y_proba_dt = dt_model.predict_proba(X_test)[:, 1] 

# 计算 ROC 曲线
fpr, tpr, thresholds = roc_curve(y_test, y_proba_dt)

# 计算 AUC 值
auc = roc_auc_score(y_test, y_proba_dt)

# 绘制 ROC 曲线
plt.figure(figsize=(8, 6))
plt.plot(fpr, tpr, label=f'ROC Curve (AUC = {auc:.2f})')
plt.plot([0, 1], [0, 1], 'k--')
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('DecisionTree Model ROC Curve')
plt.legend()
plt.show()

# 绘制 PR 曲线
precision, recall, _ = precision_recall_curve(y_test, y_proba_dt)
ap = average_precision_score(y_test, y_proba_dt)

plt.figure(figsize=(8, 6))
plt.plot(recall, precision, label=f'AP={ap:.2f}')
plt.xlabel('Recall')
plt.ylabel('Precision')
plt.title('DecisionTree Model Precision - Recall Curve')
plt.legend()
plt.grid(True)
plt.show()

# 绘制混淆矩阵
cm = confusion_matrix(y_test, y_pred_dt)
plt.figure(figsize=(8, 6))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')
plt.xlabel('Predicted Label')
plt.ylabel('True Label')
plt.title('DecisionTree Model Confusion Matrix')
plt.show()'''