In [1]:
import numpy as np
from matplotlib import pyplot as plt
from sklearn.datasets import fetch_openml
from sklearn.metrics import classification_report
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import GridSearchCV


In [2]:
x, y = fetch_openml('mnist_784', return_X_y=True, as_frame=False)

In [3]:
scaler = MinMaxScaler()
x = scaler.fit_transform(x)

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)

In [4]:
param_grid = {'C': [0.1, 1, 10, 100],
              'kernel': ['rbf', 'linear'],
              'gamma': ['scale', 'auto', 0.001, 0.01, 0.1]}

grid_search = GridSearchCV(
    estimator=SVC(verbose=True),
    param_grid=param_grid,
    cv=3,
    n_jobs=-1,
    verbose=2
)

grid_search.fit(x_train[:5000], y_train[:5000])

best_model = grid_search.best_estimator_
print("Best parameters found:", grid_search.best_params_)

Fitting 3 folds for each of 40 candidates, totalling 120 fits
[LibSVM]Best parameters found: {'C': 10, 'gamma': 'scale', 'kernel': 'rbf'}


In [5]:
y_pred = best_model.predict(x_test)
print(classification_report(y_test, y_pred))
print("Final accuracy:", best_model.score(x_test, y_test) * 100)

              precision    recall  f1-score   support

           0       0.97      0.99      0.98      1343
           1       0.97      0.98      0.98      1600
           2       0.95      0.96      0.95      1380
           3       0.94      0.94      0.94      1433
           4       0.95      0.95      0.95      1295
           5       0.95      0.94      0.95      1273
           6       0.97      0.98      0.98      1396
           7       0.97      0.95      0.96      1503
           8       0.96      0.94      0.95      1357
           9       0.94      0.94      0.94      1420

    accuracy                           0.96     14000
   macro avg       0.96      0.96      0.96     14000
weighted avg       0.96      0.96      0.96     14000

Final accuracy: 95.79285714285714


In [6]:
model = SVC(**grid_search.best_params_, verbose=1)
model.fit(x_train, y_train)

y_pred = model.predict(x_test)
print(classification_report(y_test, y_pred))
print(model.score(x_test, y_test) * 100)


[LibSVM]              precision    recall  f1-score   support

           0       0.99      0.99      0.99      1343
           1       0.99      0.99      0.99      1600
           2       0.97      0.98      0.98      1380
           3       0.98      0.98      0.98      1433
           4       0.98      0.98      0.98      1295
           5       0.98      0.98      0.98      1273
           6       0.99      0.99      0.99      1396
           7       0.98      0.98      0.98      1503
           8       0.98      0.97      0.98      1357
           9       0.98      0.97      0.97      1420

    accuracy                           0.98     14000
   macro avg       0.98      0.98      0.98     14000
weighted avg       0.98      0.98      0.98     14000

98.22142857142858


In [7]:
false_index = np.where(y_test != y_pred)[0]
true_count = len(y_test) - len(false_index)
print("false_index:", false_index)
print("true_count:", true_count)

false_index: [   59   174   241   293   313   385   579   649   699   758   789   830
   895   940  1007  1111  1135  1163  1209  1251  1291  1297  1341  1342
  1480  1495  1516  1546  1596  1611  1658  1711  1716  1826  1882  1918
  1963  1965  1987  2025  2030  2049  2077  2092  2171  2209  2297  2341
  2481  2602  2622  2666  2836  2857  2896  2993  3019  3044  3101  3174
  3207  3234  3312  3390  3403  3564  3569  3581  3661  3712  3731  3767
  3827  4008  4107  4152  4155  4280  4301  4384  4400  4409  4448  4453
  4480  4625  4627  4676  4755  4906  4954  5023  5031  5052  5099  5112
  5152  5179  5217  5253  5297  5299  5351  5438  5453  5473  5478  5515
  5549  5560  5592  5596  5628  5741  5770  5887  6015  6019  6049  6294
  6399  6519  6657  6672  6695  6711  6829  6841  6854  6907  7030  7082
  7086  7088  7094  7289  7356  7400  7477  7619  7633  7741  7876  7893
  7909  7929  7962  8039  8059  8082  8085  8136  8227  8239  8259  8315
  8344  8479  8516  8542  8586  8606  

In [8]:
dataset = np.load("digits.npz")
new_x = dataset["images"]
new_y = dataset["targets"].astype(str)

new_x = new_x.reshape(new_x.shape[0], -1)
new_x = scaler.transform(new_x)

In [9]:
y_new_pred = model.predict(new_x)

print("\n new_pred for new data:")
print("===============================")
for i, (true, pred) in enumerate(zip(new_y, y_new_pred)):
    status = "✅" if true == pred else "❌"
    print(f"{i + 1}: {true} → {pred} {status}")


 new_pred for new data:
1: 0 → 0 ✅
2: 1 → 1 ✅
3: 2 → 2 ✅
4: 3 → 3 ✅
5: 4 → 4 ✅
6: 5 → 3 ❌
7: 6 → 6 ✅
8: 7 → 3 ❌
9: 8 → 2 ❌
10: 9 → 9 ✅


In [10]:
accuracy = np.mean(new_y == y_new_pred) * 100
print(f"accuracy : {accuracy:.2f}%")

accuracy : 70.00%
