In [1]:
import numpy as np
import pandas as pd
import time

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# 各种模型
from sklearn.svm import SVC
from sklearn.neural_network import MLPClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import GaussianNB

# 读取 iris 数据
data = pd.read_csv("iris.data", header=None)

# 添加列名
data.columns = ["sepal_length", "sepal_width", "petal_length", "petal_width", "class"]

print("数据前5行：")
display(data.head())

print("样本数：", data.shape[0])
print("类别分布：")
print(data["class"].value_counts())

# 特征和标签
X = data.iloc[:, 0:4].values
y = data.iloc[:, 4].values


# 训练集 / 测试集
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

# 标准化
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# SVM 模型
svm_model = SVC(kernel='rbf', C=1.0, gamma='scale')

start = time.time()
svm_model.fit(X_train, y_train)
train_time_svm = time.time() - start

start = time.time()
y_pred_svm = svm_model.predict(X_test)
predict_time_svm = time.time() - start

acc_svm = accuracy_score(y_test, y_pred_svm)

print("===== SVM =====")
print("准确率:", acc_svm)
print("训练时间:", train_time_svm)
print("预测时间:", predict_time_svm)
print(confusion_matrix(y_test, y_pred_svm))

# 神经网络模型
mlp_model = MLPClassifier(
    hidden_layer_sizes=(50,),
    activation='relu',
    solver='adam',
    max_iter=500,
    random_state=42
)

start = time.time()
mlp_model.fit(X_train, y_train)
train_time_mlp = time.time() - start

start = time.time()
y_pred_mlp = mlp_model.predict(X_test)
predict_time_mlp = time.time() - start

acc_mlp = accuracy_score(y_test, y_pred_mlp)

print("===== Neural Network (MLP) =====")
print("准确率:", acc_mlp)
print("训练时间:", train_time_mlp)
print("预测时间:", predict_time_mlp)
print(confusion_matrix(y_test, y_pred_mlp))

# 逻辑回归模型
log_model = LogisticRegression(max_iter=200)

start = time.time()
log_model.fit(X_train, y_train)
train_time_log = time.time() - start

start = time.time()
y_pred_log = log_model.predict(X_test)
predict_time_log = time.time() - start

acc_log = accuracy_score(y_test, y_pred_log)

print("===== Logistic Regression =====")
print("准确率:", acc_log)
print("训练时间:", train_time_log)
print("预测时间:", predict_time_log)
print(confusion_matrix(y_test, y_pred_log))

# KNN 模型
knn_model = KNeighborsClassifier(n_neighbors=5)

start = time.time()
knn_model.fit(X_train, y_train)
train_time_knn = time.time() - start

start = time.time()
y_pred_knn = knn_model.predict(X_test)
predict_time_knn = time.time() - start

acc_knn = accuracy_score(y_test, y_pred_knn)

print("===== KNN =====")
print("准确率:", acc_knn)
print("训练时间:", train_time_knn)
print("预测时间:", predict_time_knn)
print(confusion_matrix(y_test, y_pred_knn))

# 随机森林模型
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)

start = time.time()
rf_model.fit(X_train, y_train)
train_time_rf = time.time() - start

start = time.time()
y_pred_rf = rf_model.predict(X_test)
predict_time_rf = time.time() - start

acc_rf = accuracy_score(y_test, y_pred_rf)

print("===== Random Forest =====")
print("准确率:", acc_rf)
print("训练时间:", train_time_rf)
print("预测时间:", predict_time_rf)
print(confusion_matrix(y_test, y_pred_rf))

# 朴素贝叶斯模型
nb_model = GaussianNB()

start = time.time()
nb_model.fit(X_train, y_train)
train_time_nb = time.time() - start

start = time.time()
y_pred_nb = nb_model.predict(X_test)
predict_time_nb = time.time() - start

acc_nb = accuracy_score(y_test, y_pred_nb)

print("===== Naive Bayes =====")
print("准确率:", acc_nb)
print("训练时间:", train_time_nb)
print("预测时间:", predict_time_nb)
print(confusion_matrix(y_test, y_pred_nb))


result_df = pd.DataFrame({
    "Model": ["SVM", "Neural Network", "Logistic", "KNN", "Random Forest", "Naive Bayes"],
    "Accuracy": [acc_svm, acc_mlp, acc_log, acc_knn, acc_rf, acc_nb],
    "Training Time (s)": [train_time_svm, train_time_mlp, train_time_log, train_time_knn, train_time_rf, train_time_nb],
    "Prediction Time (s)": [predict_time_svm, predict_time_mlp, predict_time_log, predict_time_knn, predict_time_rf, predict_time_nb]
})

print("===== 模型性能与效率对比 =====")
display(result_df)


print("\n===== 分类报告：SVM =====\n", classification_report(y_test, y_pred_svm))
print("\n===== 分类报告：MLP =====\n", classification_report(y_test, y_pred_mlp))
print("\n===== 分类报告：Logistic =====\n", classification_report(y_test, y_pred_log))
print("\n===== 分类报告：KNN =====\n", classification_report(y_test, y_pred_knn))
print("\n===== 分类报告：Random Forest =====\n", classification_report(y_test, y_pred_rf))
print("\n===== 分类报告：Naive Bayes =====\n", classification_report(y_test, y_pred_nb))



数据前5行：


Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,class
0,5.1,3.5,1.4,0.2,Iris-setosa
1,4.9,3.0,1.4,0.2,Iris-setosa
2,4.7,3.2,1.3,0.2,Iris-setosa
3,4.6,3.1,1.5,0.2,Iris-setosa
4,5.0,3.6,1.4,0.2,Iris-setosa


样本数： 150
类别分布：
class
Iris-setosa        50
Iris-versicolor    50
Iris-virginica     50
Name: count, dtype: int64
===== SVM =====
准确率: 0.9666666666666667
训练时间: 0.002012968063354492
预测时间: 0.0002067089080810547
[[10  0  0]
 [ 0  9  1]
 [ 0  0 10]]
===== Neural Network (MLP) =====
准确率: 0.9333333333333333
训练时间: 0.04982399940490723
预测时间: 0.0003597736358642578
[[10  0  0]
 [ 0  9  1]
 [ 0  1  9]]
===== Logistic Regression =====
准确率: 0.9333333333333333
训练时间: 0.00783991813659668
预测时间: 9.989738464355469e-05
[[10  0  0]
 [ 0  9  1]
 [ 0  1  9]]
===== KNN =====
准确率: 0.9333333333333333
训练时间: 0.0015873908996582031
预测时间: 0.0008718967437744141
[[10  0  0]
 [ 0 10  0]
 [ 0  2  8]]
===== Random Forest =====
准确率: 0.9
训练时间: 0.0500178337097168
预测时间: 0.0014500617980957031
[[10  0  0]
 [ 0  9  1]
 [ 0  2  8]]
===== Naive Bayes =====
准确率: 0.9666666666666667
训练时间: 0.0002849102020263672
预测时间: 8.487701416015625e-05
[[10  0  0]
 [ 0  9  1]
 [ 0  0 10]]
===== 模型性能与效率对比 =====




Unnamed: 0,Model,Accuracy,Training Time (s),Prediction Time (s)
0,SVM,0.966667,0.002013,0.000207
1,Neural Network,0.933333,0.049824,0.00036
2,Logistic,0.933333,0.00784,0.0001
3,KNN,0.933333,0.001587,0.000872
4,Random Forest,0.9,0.050018,0.00145
5,Naive Bayes,0.966667,0.000285,8.5e-05



===== 分类报告：SVM =====
                  precision    recall  f1-score   support

    Iris-setosa       1.00      1.00      1.00        10
Iris-versicolor       1.00      0.90      0.95        10
 Iris-virginica       0.91      1.00      0.95        10

       accuracy                           0.97        30
      macro avg       0.97      0.97      0.97        30
   weighted avg       0.97      0.97      0.97        30


===== 分类报告：MLP =====
                  precision    recall  f1-score   support

    Iris-setosa       1.00      1.00      1.00        10
Iris-versicolor       0.90      0.90      0.90        10
 Iris-virginica       0.90      0.90      0.90        10

       accuracy                           0.93        30
      macro avg       0.93      0.93      0.93        30
   weighted avg       0.93      0.93      0.93        30


===== 分类报告：Logistic =====
                  precision    recall  f1-score   support

    Iris-setosa       1.00      1.00      1.00        10
Iris-ve