## 数据预处理

In [1]:
from sklearn.datasets import load_digits
digits=load_digits()

In [12]:
print("特征维度：", digits["data"].shape)
print("标签维度：", digits["target"].shape)

特征维度： (1797, 64)
标签维度： (1797,)


In [15]:
import numpy as np
target_distribute = np.unique(digits["target"])
print("标签分布：", target_distribute)
print("标签类别数目：", len(target_distribute))

标签分布： [0 1 2 3 4 5 6 7 8 9]
标签类别数目： 10


In [16]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
scaler.fit(digits["data"])
x_scaled = scaler.transform(digits["data"])

In [24]:
print("原始数据的第一个样本：")
print(digits["data"][0])
print("数据标准化后的第一个样本：")
print(x_scaled[0])

原始数据的第一个样本：
[ 0.  0.  5. 13.  9.  1.  0.  0.  0.  0. 13. 15. 10. 15.  5.  0.  0.  3.
 15.  2.  0. 11.  8.  0.  0.  4. 12.  0.  0.  8.  8.  0.  0.  5.  8.  0.
  0.  9.  8.  0.  0.  4. 11.  0.  1. 12.  7.  0.  0.  2. 14.  5. 10. 12.
  0.  0.  0.  0.  6. 13. 10.  0.  0.  0.]
数据标准化后的第一个样本：
[ 0.         -0.33501649 -0.04308102  0.27407152 -0.66447751 -0.84412939
 -0.40972392 -0.12502292 -0.05907756 -0.62400926  0.4829745   0.75962245
 -0.05842586  1.12772113  0.87958306 -0.13043338 -0.04462507  0.11144272
  0.89588044 -0.86066632 -1.14964846  0.51547187  1.90596347 -0.11422184
 -0.03337973  0.48648928  0.46988512 -1.49990136 -1.61406277  0.07639777
  1.54181413 -0.04723238  0.          0.76465553  0.05263019 -1.44763006
 -1.73666443  0.04361588  1.43955804  0.         -0.06134367  0.8105536
  0.63011714 -1.12245711 -1.06623158  0.66096475  0.81845076 -0.08874162
 -0.03543326  0.74211893  1.15065212 -0.86867056  0.11012973  0.53761116
 -0.75743581 -0.20978513 -0.02359646 -0.29908135  0.08671

In [25]:
x = x_scaled
y = digits["target"]

In [26]:
from sklearn.model_selection import train_test_split
# test_size: float or int, default=0.25
# shuffle: bool, default=True
x_train, x_test, y_train, y_test = train_test_split(x, y)

In [27]:
print("将25%的数据集划分给测试集")
print(x_train.shape)
print(x_test.shape)
print(y_train.shape)
print(y_test.shape)

将25%的数据集划分给测试集
(1347, 64)
(450, 64)
(1347,)
(450,)


## 训练多层感知机

In [30]:
from sklearn.neural_network import MLPClassifier
mlp = MLPClassifier(hidden_layer_sizes=(30, 30, 30), activation='logistic', max_iter=1000)
mlp.fit(x_train, y_train)

MLPClassifier(activation='logistic', hidden_layer_sizes=(30, 30, 30),
              max_iter=1000)

In [35]:
from sklearn.metrics import classification_report
predicted = mlp.predict(x_test)
print("预测并观察结果")
print(classification_report(y_test, predicted))

预测并观察结果
              precision    recall  f1-score   support

           0       1.00      0.96      0.98        52
           1       0.87      0.96      0.91        48
           2       0.89      0.92      0.90        36
           3       0.90      0.92      0.91        50
           4       0.89      0.97      0.93        34
           5       0.97      0.94      0.96        36
           6       0.96      1.00      0.98        47
           7       0.90      0.95      0.92        38
           8       0.96      0.88      0.92        50
           9       0.96      0.85      0.90        59

    accuracy                           0.93       450
   macro avg       0.93      0.93      0.93       450
weighted avg       0.93      0.93      0.93       450



In [37]:
from sklearn.pipeline import Pipeline
from sklearn.model_selection import GridSearchCV
if __name__ == '__main__':
    pipeline = Pipeline([
        ('mlp', MLPClassifier(hidden_layer_sizes=(30, 30, 30), max_iter=1000))
    ])
    parameters = {
        'mlp__activation': ('identity','logistic','tanh','relu'),
         'mlp__solver': ('lbfgs','sgd','adam')
    }
    grid_search = GridSearchCV(pipeline, parameters, verbose=1, n_jobs=-1)
    grid_search.fit(x_train, y_train)
    print('最佳效果：%0.3f' % grid_search.best_score_)
    print('最优参数：')
    best_parameters = grid_search.best_estimator_.get_params()
    for param_name in sorted(parameters.keys()):
        print('\t%s: %r' % (param_name, best_parameters[param_name]))

    predictions = grid_search.predict(x_test)
    print(classification_report(y_test, predictions))

Fitting 5 folds for each of 12 candidates, totalling 60 fits
最佳效果：0.966
最优参数：
	mlp__activation: 'relu'
	mlp__solver: 'adam'
              precision    recall  f1-score   support

           0       1.00      0.94      0.97        52
           1       0.94      0.98      0.96        48
           2       0.90      1.00      0.95        36
           3       0.94      0.92      0.93        50
           4       0.92      0.97      0.94        34
           5       0.95      0.97      0.96        36
           6       1.00      1.00      1.00        47
           7       0.97      1.00      0.99        38
           8       0.94      0.90      0.92        50
           9       0.96      0.90      0.93        59

    accuracy                           0.95       450
   macro avg       0.95      0.96      0.95       450
weighted avg       0.95      0.95      0.95       450

