In [36]:
# Python ≥3.5 is required
import sys
assert sys.version_info >= (3, 5)
# Scikit-Learn ≥0.20 is required
import sklearn
assert sklearn.__version__ >= "0.20"
# Common imports
import numpy as np
import os
# to make this notebook's output stable across runs
np.random.seed(42)
# To plot pretty figures
%matplotlib inline
import matplotlib as mpl
import matplotlib.pyplot as plt
mpl.rc('axes', labelsize=14)
mpl.rc('xtick', labelsize=12)
mpl.rc('ytick', labelsize=12)

from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.svm import LinearSVC
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import train_test_split
from sklearn.linear_model import SGDClassifier
from sklearn.svm import SVC
from sklearn import datasets
from sklearn.metrics import accuracy_score
from scipy.stats import uniform,loguniform
from sklearn.model_selection import RandomizedSearchCV

In [2]:
from sklearn.datasets import fetch_openml
mnist = fetch_openml('mnist_784', version=1, as_frame=False)

X, y = mnist["data"], mnist["target"]

In [4]:
X.shape[0]

70000

In [5]:
train_size = int(0.8*X.shape[0])
val_size = int(0.1*X.shape[0])

X_train, y_train = X[:train_size],y[:train_size]

X_val, y_val = X[train_size:train_size+val_size],y[train_size:train_size+val_size]

X_test, y_test = X[train_size+val_size:],y[train_size+val_size:]

X_train.shape,X_val.shape,X_test.shape

((56000, 784), (7000, 784), (7000, 784))

In [14]:
scaler = StandardScaler()

X_train_scaled = scaler.fit_transform(X_train)
X_val_scaled = scaler.transform(X_val)
X_test_scaled = scaler.transform(X_test)

In [15]:
svm_clf = SVC(C=1, kernel="rbf")

In [16]:
svm_clf.fit(X_train_scaled,y_train)

SVC(C=1)

In [17]:
val_prediction = svm_clf.predict(X_val_scaled)

In [19]:
from sklearn.metrics import classification_report

print(
    f"Classification report for classifier {svm_clf}:\n"
    f"{classification_report(y_val, val_prediction)}\n"
)

Classification report for classifier SVC(C=1):
              precision    recall  f1-score   support

           0       0.98      0.98      0.98       664
           1       0.98      0.99      0.98       774
           2       0.96      0.97      0.96       711
           3       0.96      0.97      0.97       719
           4       0.98      0.96      0.97       712
           5       0.97      0.95      0.96       644
           6       0.98      0.97      0.97       653
           7       0.93      0.97      0.95       761
           8       0.96      0.96      0.96       677
           9       0.96      0.93      0.95       685

    accuracy                           0.97      7000
   macro avg       0.97      0.97      0.97      7000
weighted avg       0.97      0.97      0.97      7000




In [24]:
accuracy_score(val_prediction,y_val),np.mean((val_prediction == y_val))

(0.9657142857142857, 0.9657142857142857)

This looks really good!