In [1]:
#train an SVM classifier on the MNIST dataset. Since SVM classifiers are binary classifiers, you will need to use one-versus-all to classify all 10 digits. You may want to tune the hyperparameters using small validation sets to speed up the process. What accuracy can you reach?

In [2]:
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
from sklearn.datasets import fetch_openml

In [3]:
mnist = fetch_openml('mnist_784')
X, y = mnist.data.astype('float64'), mnist.target.astype('int')

In [4]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [5]:
svm_clf = SVC(kernel='rbf', decision_function_shape='ovr', random_state=42)

In [6]:
svm_clf.fit(X_train, y_train)

SVC(random_state=42)

In [7]:
y_pred = svm_clf.predict(X_test)

In [8]:
accuracy = accuracy_score(y_test, y_pred)

In [9]:
print(f'Accuracy on the test set: {accuracy:.2%}')

Accuracy on the test set: 97.64%


In [10]:
#lets do this on smaller datasets
from sklearn.model_selection import RandomizedSearchCV
from scipy.stats import reciprocal, uniform

In [13]:
from sklearn.preprocessing import StandardScaler


In [16]:
import numpy as np

In [18]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train.astype(np.float32))
X_test_scaled = scaler.transform(X_test.astype(np.float32))

In [20]:
param_distributions = {"gamma": reciprocal(0.001, 0.1), "C": uniform(1, 10)}
rnd_search_cv = RandomizedSearchCV(svm_clf, param_distributions, n_iter=10, verbose=2, cv=3)
rnd_search_cv.fit(X_train_scaled[:1000], y_train[:1000])

Fitting 3 folds for each of 10 candidates, totalling 30 fits
[CV] END ..C=2.5104985146439898, gamma=0.0014303399556714635; total time=   0.2s
[CV] END ..C=2.5104985146439898, gamma=0.0014303399556714635; total time=   0.2s
[CV] END ..C=2.5104985146439898, gamma=0.0014303399556714635; total time=   0.2s
[CV] END ....C=6.256543883215198, gamma=0.002213670507863208; total time=   0.2s
[CV] END ....C=6.256543883215198, gamma=0.002213670507863208; total time=   0.2s
[CV] END ....C=6.256543883215198, gamma=0.002213670507863208; total time=   0.2s
[CV] END ..C=2.4641506076406485, gamma=0.0016414345309586418; total time=   0.2s
[CV] END ..C=2.4641506076406485, gamma=0.0016414345309586418; total time=   0.2s
[CV] END ..C=2.4641506076406485, gamma=0.0016414345309586418; total time=   0.2s
[CV] END ...C=10.032427814708422, gamma=0.022711648083653557; total time=   0.3s
[CV] END ...C=10.032427814708422, gamma=0.022711648083653557; total time=   0.3s
[CV] END ...C=10.032427814708422, gamma=0.022711

RandomizedSearchCV(cv=3, estimator=SVC(random_state=42),
                   param_distributions={'C': <scipy.stats._distn_infrastructure.rv_frozen object at 0x7fad3a7a5dc0>,
                                        'gamma': <scipy.stats._distn_infrastructure.rv_frozen object at 0x7fad3a7b7070>},
                   verbose=2)

In [21]:
rnd_search_cv.best_estimator_

SVC(C=4.554033927907261, gamma=0.0012148345645519132, random_state=42)

In [22]:
rnd_search_cv.best_score_

0.887989786193379

In [23]:
rnd_search_cv.best_estimator_.fit(X_train_scaled, y_train)

SVC(C=4.554033927907261, gamma=0.0012148345645519132, random_state=42)

In [24]:
y_pred = rnd_search_cv.best_estimator_.predict(X_train_scaled)
accuracy_score(y_train, y_pred)

0.9968571428571429