In [15]:
import pandas as pd
column_names = ['sepal_length', 'sepal_width', 'petal_length', 'petal_width', 'target']
df = pd.read_csv('data/iris.data', header=None, names=column_names)
print(df.head())

   sepal_length  sepal_width  petal_length  petal_width       target
0           5.1          3.5           1.4          0.2  Iris-setosa
1           4.9          3.0           1.4          0.2  Iris-setosa
2           4.7          3.2           1.3          0.2  Iris-setosa
3           4.6          3.1           1.5          0.2  Iris-setosa
4           5.0          3.6           1.4          0.2  Iris-setosa


## 数据预处理

In [10]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

X = df.drop(columns='target')
y = df['target']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=123)
scaler = StandardScaler()

X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

## 模型对比

In [11]:
from sklearn.svm import SVC
from sklearn.metrics import classification_report, accuracy_score

# SVM模型
svm_model = SVC(kernel='linear', random_state=42)
svm_model.fit(X_train, y_train)
y_pred_svm = svm_model.predict(X_test)
print("SVM Accuracy:", accuracy_score(y_test, y_pred_svm))
print("SVM Classification Report:\n", classification_report(y_test, y_pred_svm))

SVM Accuracy: 0.9777777777777777
SVM Classification Report:
                  precision    recall  f1-score   support

    Iris-setosa       1.00      1.00      1.00        18
Iris-versicolor       0.91      1.00      0.95        10
 Iris-virginica       1.00      0.94      0.97        17

       accuracy                           0.98        45
      macro avg       0.97      0.98      0.97        45
   weighted avg       0.98      0.98      0.98        45



In [17]:
from sklearn.neural_network import MLPClassifier

# 神经网络模型（线性层）
mlp_model = MLPClassifier(hidden_layer_sizes=(100,), max_iter=1000, random_state=42)
mlp_model.fit(X_train, y_train)
y_pred_mlp = mlp_model.predict(X_test)
print("MLP Accuracy:", accuracy_score(y_test, y_pred_mlp))
print("MLP Classification Report:\n", classification_report(y_test, y_pred_mlp))

MLP Accuracy: 0.9777777777777777
MLP Classification Report:
                  precision    recall  f1-score   support

    Iris-setosa       1.00      1.00      1.00        18
Iris-versicolor       0.91      1.00      0.95        10
 Iris-virginica       1.00      0.94      0.97        17

       accuracy                           0.98        45
      macro avg       0.97      0.98      0.97        45
   weighted avg       0.98      0.98      0.98        45



In [16]:
from sklearn.ensemble import RandomForestClassifier

# 随机森林模型
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
rf_model.fit(X_train, y_train)
y_pred_rf = rf_model.predict(X_test)
print("Random Forest Accuracy:", accuracy_score(y_test, y_pred_rf))
print("Random Forest Classification Report:\n", classification_report(y_test, y_pred_rf))

Random Forest Accuracy: 0.9333333333333333
Random Forest Classification Report:
                  precision    recall  f1-score   support

    Iris-setosa       1.00      1.00      1.00        18
Iris-versicolor       0.77      1.00      0.87        10
 Iris-virginica       1.00      0.82      0.90        17

       accuracy                           0.93        45
      macro avg       0.92      0.94      0.92        45
   weighted avg       0.95      0.93      0.93        45



In [19]:
import time

start_time = time.time()
svm_model.fit(X_train, y_train)
svm_train_time = time.time() - start_time

start_time = time.time()
mlp_model.fit(X_train, y_train)
mlp_train_time = time.time() - start_time

start_time = time.time()
rf_model.fit(X_train, y_train)
rf_train_time = time.time() - start_time

print(f"SVM Training Time: {svm_train_time:.4f} seconds")
print(f"MLP Training Time: {mlp_train_time:.4f} seconds")
print(f"Random Forest Training Time: {rf_train_time:.4f} seconds")

SVM Training Time: 0.0024 seconds
MLP Training Time: 0.1801 seconds
Random Forest Training Time: 0.0943 seconds


### 随机森林调优

In [21]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import accuracy_score, classification_report

# 定义参数网格
param_grid = {
    'n_estimators': [50, 100, 150, 200],  # 树的数量
    'max_depth': [None, 10, 20, 30],  # 树的最大深度
    'min_samples_split': [2, 5, 10],  # 分割一个节点所需的最小样本数
    'min_samples_leaf': [1, 2, 4],  # 叶节点的最小样本数
    'max_features': ['auto', 'sqrt', 'log2']  # 每棵树考虑的最大特征数量
}

# 随机森林模型
rf_model = RandomForestClassifier(random_state=42)

# 使用GridSearchCV进行超参数调优
grid_search = GridSearchCV(estimator=rf_model, param_grid=param_grid, cv=5, n_jobs=-1, verbose=2)

# 在训练集上拟合模型
grid_search.fit(X_train, y_train)

# 输出最优超参数组合
print("Best Parameters:", grid_search.best_params_)

# 使用最优参数训练后的模型进行预测
y_pred_rf = grid_search.best_estimator_.predict(X_test)

# 输出准确度和分类报告
print("Random Forest Accuracy:", accuracy_score(y_test, y_pred_rf))
print("Random Forest Classification Report:\n", classification_report(y_test, y_pred_rf))

Fitting 5 folds for each of 432 candidates, totalling 2160 fits
[CV] END max_depth=None, max_features=auto, min_samples_leaf=1, min_samples_split=2, n_estimators=50; total time=   0.0s
[CV] END max_depth=None, max_features=auto, min_samples_leaf=1, min_samples_split=2, n_estimators=100; total time=   0.0s
[CV] END max_depth=None, max_features=auto, min_samples_leaf=1, min_samples_split=2, n_estimators=100; total time=   0.0s
[CV] END max_depth=None, max_features=auto, min_samples_leaf=1, min_samples_split=2, n_estimators=150; total time=   0.1s
[CV] END max_depth=None, max_features=auto, min_samples_leaf=1, min_samples_split=2, n_estimators=150; total time=   0.0s
[CV] END max_depth=None, max_features=auto, min_samples_leaf=1, min_samples_split=2, n_estimators=150; total time=   0.0s
[CV] END max_depth=None, max_features=auto, min_samples_leaf=1, min_samples_split=2, n_estimators=150; total time=   0.0s
[CV] END max_depth=None, max_features=auto, min_samples_leaf=1, min_samples_split=2

720 fits failed out of a total of 2160.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
370 fits failed with the following error:
Traceback (most recent call last):
  File "/usr/local/lib/python3.10/site-packages/sklearn/model_selection/_validation.py", line 888, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/usr/local/lib/python3.10/site-packages/sklearn/base.py", line 1466, in wrapper
    estimator._validate_params()
  File "/usr/local/lib/python3.10/site-packages/sklearn/base.py", line 666, in _validate_params
    validate_parameter_constraints(
  File "/usr/local/lib/python3.10/site-packages/sklearn/utils/_param_validation.py", line 95, in validate_parameter_constraints
    raise InvalidParameterError(
s

Best Parameters: {'max_depth': None, 'max_features': 'sqrt', 'min_samples_leaf': 1, 'min_samples_split': 2, 'n_estimators': 50}
Random Forest Accuracy: 0.9333333333333333
Random Forest Classification Report:
                  precision    recall  f1-score   support

    Iris-setosa       1.00      1.00      1.00        18
Iris-versicolor       0.77      1.00      0.87        10
 Iris-virginica       1.00      0.82      0.90        17

       accuracy                           0.93        45
      macro avg       0.92      0.94      0.92        45
   weighted avg       0.95      0.93      0.93        45



In [22]:
# 使用最优参数训练后的模型进行预测
y_pred_rf = grid_search.best_estimator_.predict(X_test)

# 输出准确度和分类报告
print("Random Forest Accuracy:", accuracy_score(y_test, y_pred_rf))
print("Random Forest Classification Report:\n", classification_report(y_test, y_pred_rf))

Random Forest Accuracy: 0.9333333333333333
Random Forest Classification Report:
                  precision    recall  f1-score   support

    Iris-setosa       1.00      1.00      1.00        18
Iris-versicolor       0.77      1.00      0.87        10
 Iris-virginica       1.00      0.82      0.90        17

       accuracy                           0.93        45
      macro avg       0.92      0.94      0.92        45
   weighted avg       0.95      0.93      0.93        45



### 加深MLP网络

In [29]:
from sklearn.neural_network import MLPClassifier

# 神经网络模型（线性层）
start_time = time.time()
mlp_model = MLPClassifier(hidden_layer_sizes=(100,100,), max_iter=1000, random_state=42)
mlp_model.fit(X_train, y_train)
mlp_train_time = time.time() - start_time
y_pred_mlp = mlp_model.predict(X_test)
print(f"MLP Training Time: {mlp_train_time:.4f} seconds")
print("MLP Accuracy:", accuracy_score(y_test, y_pred_mlp))
print("MLP Classification Report:\n", classification_report(y_test, y_pred_mlp))

MLP Training Time: 0.0001 seconds
MLP Accuracy: 0.9555555555555556
MLP Classification Report:
                  precision    recall  f1-score   support

    Iris-setosa       1.00      1.00      1.00        18
Iris-versicolor       0.83      1.00      0.91        10
 Iris-virginica       1.00      0.88      0.94        17

       accuracy                           0.96        45
      macro avg       0.94      0.96      0.95        45
   weighted avg       0.96      0.96      0.96        45



In [34]:

from sklearn.neural_network import MLPClassifier

# 神经网络模型（线性层）
start_time = time.time()
mlp_model = MLPClassifier(hidden_layer_sizes=(200,100,), max_iter=1000, random_state=42)
mlp_model.fit(X_train, y_train)
mlp_train_time = time.time() - start_time
y_pred_mlp = mlp_model.predict(X_test)
print(f"MLP Training Time: {mlp_train_time:.4f} seconds")
print("MLP Accuracy:", accuracy_score(y_test, y_pred_mlp))
print("MLP Classification Report:\n", classification_report(y_test, y_pred_mlp))

MLP Training Time: 19.5017 seconds
MLP Accuracy: 0.9333333333333333
MLP Classification Report:
                  precision    recall  f1-score   support

    Iris-setosa       1.00      1.00      1.00        18
Iris-versicolor       0.77      1.00      0.87        10
 Iris-virginica       1.00      0.82      0.90        17

       accuracy                           0.93        45
      macro avg       0.92      0.94      0.92        45
   weighted avg       0.95      0.93      0.93        45

