In [1]:
# 导入
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
plt.rcParams['font.sans-serif'] = ['SimHei']  # 设置中文字体为黑体
plt.rcParams['axes.unicode_minus'] = False  # 正确显示负号
import warnings
warnings.filterwarnings("ignore")

In [2]:
import xgboost as xgb
from xgboost import XGBClassifier
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, classification_report
from sklearn.preprocessing import MultiLabelBinarizer
from sklearn.multiclass import OneVsRestClassifier
from sklearn.metrics import multilabel_confusion_matrix
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.metrics import confusion_matrix, recall_score, f1_score
from sklearn.metrics import confusion_matrix, classification_report
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import cross_validate
from sklearn.model_selection import cross_val_predict
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import VotingClassifier
import joblib
from sklearn.ensemble import GradientBoostingClassifier

In [3]:
error_list=['物料推送装置故障1001',
'物料检测装置故障2001',
'填装装置检测故障4001',
'填装装置定位故障4002',
'填装装置填装故障4003',
'加盖装置定位故障5001',
'加盖装置加盖故障5002',
'拧盖装置定位故障6001',
'拧盖装置拧盖故障6002'
]
error_value=[1001, 2001, 4001, 4002, 4003, 5001, 5002, 6001, 6002]

In [4]:
def model(X,y):
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42, shuffle=True)
    # 特征缩放
    scaler = StandardScaler()
    X_train = scaler.fit_transform(X_train)
    X_test = scaler.transform(X_test)
    # 初始化随机森林分类器
    model1 = RandomForestClassifier(n_estimators=100, random_state=42)  # n_estimators是树的数量
    model2 = GradientBoostingClassifier(
        n_estimators=200,  # 提升阶段（树）的数量
        learning_rate=0.1,  # 防止过拟合的步长缩减
        max_depth=15,  # 每棵树的最大深度
        min_samples_split=50,  # 分裂内部节点所需的最小样本数
        min_samples_leaf=10,  # 在叶节点处所需的最小样本数
        subsample=0.6,  # 用于拟合树的样本比例
        random_state=42  # 用于可重现性的随机种子
        )
    model3 = XGBClassifier(random_state=120,
                      objective = 'multi:softmax',
                      eval_metric = ['mlogloss', 'aucpr', 'auc', 'map'],
                      num_class=2,
                      n_estimators=340,
                      reg_lambda=2,
                      min_child_weight=0,
                      max_depth=6,
                      learning_rate=0.1)
    # 创建逻辑回归模型
    model4 = LogisticRegression(max_iter=1500, penalty='l2', solver='lbfgs')
    # 使用多数投票集成组合模型
    ensemble = VotingClassifier(estimators=[
        ('RF', model1),
        ('GBDT', model2),
        ('XGB', model3),
        ('LR',model4)
    ], voting='hard')

    # 在训练数据上拟合集成模型
    ensemble.fit(X_train, y_train)

    # 在测试数据上评估集成模型的性能
    print(f"整体精度：{ensemble.score(X_test, y_test) * 100:.2f}%")

    # 假设你有模型预测的标签和实际标签
    y_pred = ensemble.predict(X_test)
    conf_matrix = confusion_matrix(y_test, y_pred)

    print("混淆矩阵：")
    print(conf_matrix)

    # 打印分类报告，包括准确度、召回率和精确度
    print("\n分类报告：")
    print(classification_report(y_test, y_pred))
    
    # 特征缩放
#     scaler = StandardScaler()
#     X = scaler.fit_transform(X)
#     # 在所有数据上拟合集成模型
#     ensemble.fit(X, y)

    ensemble_name = error_list[i]+'.pkl'
    # 保存训练好的集成模型到文件
    joblib.dump(ensemble, ensemble_name)

    # 打印成功消息
    print(f"训练好的集成模型已保存为 {ensemble_name}")


In [5]:
for i in range(len(error_list)):
    df = pd.DataFrame()
    df_name = './data/error/'+ str(error_value[i])+ '.csv'
    df = pd.read_csv(df_name, index_col=0)
    n = len(df.columns)
    # 选择倒数前九列
    dfx = df.iloc[:, :n-9]
    # 删除生产线编号列
    dfx = dfx.drop(['生产线编号'], axis=1)
    # 将特定列中所有不等于 0 的值替换为 1
    df[error_list[i]] = (df[error_list[i]] != 0).astype(int)
    X=dfx
    y=df[error_list[i]]
    print(y.value_counts())
    model(X,y)
    print()

物料推送装置故障1001
1    37008
0    37008
Name: count, dtype: int64
整体精度：100.00%
混淆矩阵：
[[11165     1]
 [    0 11039]]

分类报告：
              precision    recall  f1-score   support

           0       1.00      1.00      1.00     11166
           1       1.00      1.00      1.00     11039

    accuracy                           1.00     22205
   macro avg       1.00      1.00      1.00     22205
weighted avg       1.00      1.00      1.00     22205

训练好的集成模型已保存为 物料推送装置故障1001.pkl

物料检测装置故障2001
1    23254
0    23254
Name: count, dtype: int64
整体精度：99.41%
混淆矩阵：
[[6966   23]
 [  59 6905]]

分类报告：
              precision    recall  f1-score   support

           0       0.99      1.00      0.99      6989
           1       1.00      0.99      0.99      6964

    accuracy                           0.99     13953
   macro avg       0.99      0.99      0.99     13953
weighted avg       0.99      0.99      0.99     13953

训练好的集成模型已保存为 物料检测装置故障2001.pkl

填装装置检测故障4001
1    28062
0    28062
Name: count, dtype