#### SVM算法应用于digits/MNIST数据集

In [6]:
# 导入包
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn import svm
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# 加载手写数字数据集digits
digits = datasets.load_digits()
X, y = digits.data, digits.target
print(X.shape)
print(y.shape)
# 画出其中一幅图
# import matplotlib.pyplot as plt
# plt.gray()
# plt.matshow(digits.images[0])
# plt.show()


(1797, 64)
(1797,)


In [7]:
# 将数据集划分为训练集和测试集
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
print("X_train.shape:", X_train.shape)
print("X_test.shape:", X_test.shape)
# 数据标准化
scaler = StandardScaler()
X_train_std = scaler.fit_transform(X_train)
X_test_std = scaler.transform(X_test)  # 修改: 使用transform而不是fit_transform
# 创建SVM分类器
svm_model = svm.SVC(kernel="rbf", C=1.0, gamma="scale")
# 训练模型
svm_model.fit(X_train_std, y_train)
# 预测测试集
y_pred = svm_model.predict(X_test_std)
# 计算准确率
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)
# 打印分类报告
print("Classification Report:")
print(classification_report(y_test, y_pred))
# # 打印混淆矩阵
# print("Confusion Matrix:")
# print(confusion_matrix(y_test, y_pred))

X_train.shape: (1437, 64)
X_test.shape: (360, 64)
Accuracy: 0.9805555555555555
Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        33
           1       1.00      1.00      1.00        28
           2       1.00      1.00      1.00        33
           3       1.00      0.97      0.99        34
           4       0.96      1.00      0.98        46
           5       0.96      0.98      0.97        47
           6       0.97      1.00      0.99        35
           7       1.00      0.94      0.97        34
           8       0.97      0.97      0.97        30
           9       0.97      0.95      0.96        40

    accuracy                           0.98       360
   macro avg       0.98      0.98      0.98       360
weighted avg       0.98      0.98      0.98       360



In [None]:
# 使用交叉验证方法，寻找SVM模型的最佳参数(超参数调优)
from sklearn.model_selection import GridSearchCV
# 定义参数网格
param_grid = {
    'C': [0.1, 1, 10, 20, 100],
    'gamma': [1, 0.1, 0.01, 0.001],
    'kernel': ['rbf']
}
# 创建GridSearchCV对象
grid_search = GridSearchCV(svm.SVC(), param_grid, refit=True, verbose=2, cv=5)
# 训练模型
grid_search.fit(X_train_std, y_train)
# 输出最佳参数
print("Best Parameters:", grid_search.best_params_)
# 使用最佳参数预测测试集
y_pred_best = grid_search.predict(X_test_std)
# 计算准确率
accuracy_best = accuracy_score(y_test, y_pred_best)
print("Accuracy with best parameters:", accuracy_best)
# # 打印分类报告
# print("Classification Report with best parameters:")
# print(classification_report(y_test, y_pred_best))