In [12]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn import datasets
from sklearn.model_selection import train_test_split, StratifiedShuffleSplit, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score

In [13]:
mnist = datasets.fetch_openml('mnist_784', version=1, as_frame=False)
X, y = mnist.data, mnist.target.astype(np.uint8)  # Convert y to integer

n_samples = 1000  # Here we can change the number of samples to see the result

split = StratifiedShuffleSplit(n_splits=1, test_size=0.05, random_state=42)
for train_idx, test_idx in split.split(X[:n_samples], y[:n_samples]):
    X_train, X_test = X[train_idx], X[test_idx]
    y_train, y_test = y[train_idx], y[test_idx]

# Scale the data for better SVM performance
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Use an SVM classifier with the one-vs-all strategy
svm_clf = SVC(kernel='rbf', C=10, gamma=0.001)  # Tune C and gamma for better performance
svm_clf.fit(X_train, y_train)

# Predict on test set
y_pred = svm_clf.predict(X_test)

# Evaluate accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f"Test Accuracy: {accuracy:.4f}")

Test Accuracy: 0.9800


In [14]:
param_grid = {
    'C': [1, 10, 100],
    'gamma': [0.01, 0.001]
}
grid_search = GridSearchCV(SVC(kernel='rbf'), param_grid, cv=3, n_jobs=-1)
grid_search.fit(X_train[:5000], y_train[:5000])  # Use a subset for speed

print("Best parameters:", grid_search.best_params_)

Best parameters: {'C': 10, 'gamma': 0.001}
