# Hello MNIST SVM MultiClass Classifier (OvO: One-versus-One)

In [1]:
# Prerequisites
import sys
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
print("Python Version: ", sys.version)
print("Numpy Version: ", np.__version__)
print("Pandas Version: ", pd.__version__)
np.set_printoptions(precision=3, suppress=True)

Python Version:  3.12.7 (tags/v3.12.7:0b05ead, Oct  1 2024, 03:06:41) [MSC v.1941 64 bit (AMD64)]
Numpy Version:  2.2.2
Pandas Version:  2.2.3


### Load Data

In [2]:
from sklearn.datasets import fetch_openml

mnist = fetch_openml('mnist_784', as_frame=False)  # Load as Numpy array (not pandas dataframe)
print("Type: ", type(mnist))

Type:  <class 'sklearn.utils._bunch.Bunch'>


Create Train and Test Set

In [3]:
X, y = mnist.data, mnist.target
print("Shape of X: ", X.shape)
print("Shape of y: ", y.shape)

Shape of X:  (70000, 784)
Shape of y:  (70000,)


In [4]:
# Create Train and test sets
X_train, X_test = X[:60000], X[60000:]
y_train, y_test = y[:60000], y[60000:]

### Train SVM Classifier
Note: SVC Classifier is a binary classifier, for multiclass classifier problem Scikit-Learn automatically creates many binary classifiers based One-versus-one Strategy, in the case of MNIST it creates 45 classifiers

In [5]:
from sklearn.svm import SVC

svm_classifier = SVC(random_state=42)
svm_classifier.fit(X_train[:2000], y_train[:2000])

Check predictions on a few digits

In [6]:
for i in range(8):
    print(f"digit = {y_train[i]}, predict = {svm_classifier.predict([X_train[i]])}")

digit = 5, predict = ['5']
digit = 0, predict = ['0']
digit = 4, predict = ['4']
digit = 1, predict = ['1']
digit = 9, predict = ['9']
digit = 2, predict = ['2']
digit = 1, predict = ['1']
digit = 3, predict = ['3']


decision_function shows scores for each digits based on the different binary classifiers

In [7]:
for i in range(8):
    scores = svm_classifier.decision_function([X_train[i]])
    class_id = np.argmax(scores)
    print(f"digit = {y_train[i]}, scores = {scores}, class_id (max) = {class_id}")

digit = 5, scores = [[ 3.793  0.729  6.062  8.298 -0.294  9.302  1.747  2.774  7.206  4.822]], class_id (max) = 5
digit = 0, scores = [[ 9.308 -0.294  6.138  7.215  0.714  8.277  3.796  1.765  2.766  5.112]], class_id (max) = 0
digit = 4, scores = [[ 1.757 -0.289  7.227  1.89   9.302  4.937  3.842  6.1    1.752  8.194]], class_id (max) = 4
digit = 1, scores = [[-0.293  9.305  7.269  4.828  3.809  2.755  0.732  7.192  7.277  1.725]], class_id (max) = 1
digit = 9, scores = [[-0.293  3.766  2.746  3.752  8.281  2.809  0.716  7.277  6.192  9.306]], class_id (max) = 9
digit = 2, scores = [[ 3.838 -0.291  9.302  5.099  1.764  6.13   0.74   2.806  7.205  8.249]], class_id (max) = 2
digit = 1, scores = [[-0.303  9.305  5.107  7.22   3.949  6.191  2.803  1.771  7.176  1.784]], class_id (max) = 1
digit = 3, scores = [[ 3.804 -0.29   7.237  9.309  2.731  6.246  0.707  1.72   8.288  5.103]], class_id (max) = 3


### One-versus-Rest (OvR) Classifier

To do one-versus-one (OvO) or one-versus-rest (OvR) with any classifier, use OneVsOneClassifier() or OneVsRestClassifier() classes  

In [9]:
from sklearn.multiclass import OneVsRestClassifier

ovr_svm_clf = OneVsRestClassifier(SVC(random_state=42))
ovr_svm_clf.fit(X_train[:2000], y_train[:2000])

In [10]:
for i in range(8):
    print(f"digit = {y_train[i]}, predict = {ovr_svm_clf.predict([X_train[i]])}")

digit = 5, predict = ['5']
digit = 0, predict = ['0']
digit = 4, predict = ['4']
digit = 1, predict = ['1']
digit = 9, predict = ['9']
digit = 2, predict = ['2']
digit = 1, predict = ['1']
digit = 3, predict = ['3']


In [11]:
for i in range(8):
    scores = ovr_svm_clf.decision_function([X_train[i]])
    class_id = np.argmax(scores)
    print(f"digit = {y_train[i]}, scores = {scores}, class_id (max) = {class_id}")

digit = 5, scores = [[-1.328 -1.791 -1.393 -0.944 -2.29   0.809 -1.723 -1.21  -1.637 -1.792]], class_id (max) = 5
digit = 0, scores = [[ 1.306 -1.887 -1.45  -1.58  -2.138 -1.58  -1.714 -1.433 -1.554 -1.328]], class_id (max) = 0
digit = 4, scores = [[-1.467 -1.511 -1.    -1.186  1.    -1.434 -1.364 -1.142 -1.626 -1.781]], class_id (max) = 4
digit = 1, scores = [[-1.844  1.238 -1.199 -1.326 -1.158 -1.476 -1.82  -1.259 -1.248 -2.25 ]], class_id (max) = 1
digit = 9, scores = [[-1.847 -1.491 -1.745 -1.643 -1.    -1.479 -1.814 -1.195 -1.406  1.   ]], class_id (max) = 9
digit = 2, scores = [[-1.227 -2.217  0.788 -1.288 -2.236 -1.29  -1.711 -1.377 -1.377 -1.   ]], class_id (max) = 2
digit = 1, scores = [[-2.481  1.005 -1.038 -1.011 -1.526 -1.362 -1.636 -1.5   -1.286 -1.291]], class_id (max) = 1
digit = 3, scores = [[-1.273 -2.197 -1.091  1.374 -1.74  -1.291 -2.328 -2.068 -1.505 -1.399]], class_id (max) = 3
