# MNIST data Linear SVM

## Initialize

In [None]:
# Common imports
import numpy as np
import os

# to make this notebook's output stable across runs
np.random.seed(2042)

# To plot pretty figures
%matplotlib inline
import matplotlib as mpl
import matplotlib.pyplot as plt

# Where to save the figures
PROJECT_ROOT_DIR = "."
IMAGES_PATH = os.path.join(PROJECT_ROOT_DIR, "images")
os.makedirs(IMAGES_PATH, exist_ok=True)


## Fetch Data

In [None]:
# Fetch spam data

from sklearn.datasets import fetch_openml
mnist = fetch_openml('mnist_784', version=1, cache=True)

In [None]:
X = mnist["data"]
y = mnist["target"].astype(np.uint8)

X_train = X[:60000]
y_train = y[:60000]
X_test = X[60000:]
y_test = y[60000:]



In [None]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train.astype(np.float32))
X_test_scaled = scaler.transform(X_test.astype(np.float32))

## Train Model

In [None]:
from sklearn.svm import SVC

svm_clf = SVC(gamma="scale")
svm_clf.fit(X_train_scaled[:1000], y_train[:1000])          

SVC(C=1.0, break_ties=False, cache_size=200, class_weight=None, coef0=0.0,
    decision_function_shape='ovr', degree=3, gamma='scale', kernel='rbf',
    max_iter=-1, probability=False, random_state=None, shrinking=True,
    tol=0.001, verbose=False)

## Measure Performance

In [None]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, classification_report

def model_scores(y_true, y_pred, y_labels):
    accuracy = accuracy_score(y_true, y_pred)
    precision = precision_score(y_true, y_pred, average="macro")
    recall = recall_score(y_true, y_pred, average="macro")
    f1 = f1_score(y_true, y_pred, average="macro")
    confusion = confusion_matrix(y_true, y_pred, labels=y_labels)
    output = "Accuracy: {}\nPrecision: {}\nRecall {}\nF1-Score: {}\n\nConfusion Matrix: \n{} \n\nClassification Report: \n{}".format(accuracy, precision, recall, f1, confusion, classification_report(y_true, y_pred))
    return output


In [None]:
y_pred = svm_clf.predict(X_test_scaled[:100])
print(model_scores(y_test[:100], y_pred, svm_clf.classes_))

Accuracy: 0.9
Precision: 0.916825396825397
Recall 0.9001839826839827
F1-Score: 0.9029594993692356

Confusion Matrix: 
[[ 8  0  0  0  0  0  0  0  0  0]
 [ 0 14  0  0  0  0  0  0  0  0]
 [ 0  0  7  0  0  0  0  1  0  0]
 [ 0  0  1  9  0  1  0  0  0  0]
 [ 0  0  0  0 14  0  0  0  0  0]
 [ 0  0  1  0  0  6  0  0  0  0]
 [ 0  0  1  0  1  0  7  1  0  0]
 [ 0  0  0  0  0  0  0 14  0  1]
 [ 0  0  0  0  0  0  0  0  2  0]
 [ 0  0  0  0  0  0  0  2  0  9]] 

Classification Report: 
              precision    recall  f1-score   support

           0       1.00      1.00      1.00         8
           1       1.00      1.00      1.00        14
           2       0.70      0.88      0.78         8
           3       1.00      0.82      0.90        11
           4       0.93      1.00      0.97        14
           5       0.86      0.86      0.86         7
           6       1.00      0.70      0.82        10
           7       0.78      0.93      0.85        15
           8       1.00      1.00     

## Optimize Model Parameters

In [None]:
from sklearn.model_selection import RandomizedSearchCV
from scipy.stats import reciprocal, uniform

param_distributions = {"gamma": reciprocal(0.001, 0.1), "C": uniform(1, 10)}
rnd_search_cv = RandomizedSearchCV(svm_clf, param_distributions, n_iter=10, verbose=2, cv=3)
rnd_search_cv.fit(X_train_scaled[:1000], y_train[:1000])

Fitting 3 folds for each of 10 candidates, totalling 30 fits
[CV] C=10.907619568894967, gamma=0.0062438299522977565 ...............


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.


[CV]  C=10.907619568894967, gamma=0.0062438299522977565, total=   1.5s
[CV] C=10.907619568894967, gamma=0.0062438299522977565 ...............


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    1.5s remaining:    0.0s


[CV]  C=10.907619568894967, gamma=0.0062438299522977565, total=   1.3s
[CV] C=10.907619568894967, gamma=0.0062438299522977565 ...............
[CV]  C=10.907619568894967, gamma=0.0062438299522977565, total=   1.3s
[CV] C=6.75011734742257, gamma=0.008162191423759744 ..................
[CV] ... C=6.75011734742257, gamma=0.008162191423759744, total=   1.3s
[CV] C=6.75011734742257, gamma=0.008162191423759744 ..................
[CV] ... C=6.75011734742257, gamma=0.008162191423759744, total=   1.3s
[CV] C=6.75011734742257, gamma=0.008162191423759744 ..................
[CV] ... C=6.75011734742257, gamma=0.008162191423759744, total=   1.3s
[CV] C=5.912834555049468, gamma=0.001196881115803255 .................
[CV] .. C=5.912834555049468, gamma=0.001196881115803255, total=   0.9s
[CV] C=5.912834555049468, gamma=0.001196881115803255 .................
[CV] .. C=5.912834555049468, gamma=0.001196881115803255, total=   1.0s
[CV] C=5.912834555049468, gamma=0.001196881115803255 .................
[CV] .

[Parallel(n_jobs=1)]: Done  30 out of  30 | elapsed:   37.8s finished


RandomizedSearchCV(cv=3, error_score=nan,
                   estimator=SVC(C=1.0, break_ties=False, cache_size=200,
                                 class_weight=None, coef0=0.0,
                                 decision_function_shape='ovr', degree=3,
                                 gamma='scale', kernel='rbf', max_iter=-1,
                                 probability=False, random_state=None,
                                 shrinking=True, tol=0.001, verbose=False),
                   iid='deprecated', n_iter=10, n_jobs=None,
                   param_distributions={'C': <scipy.stats._distn_infrastructure.rv_frozen object at 0x7fcd1cb51a58>,
                                        'gamma': <scipy.stats._distn_infrastructure.rv_frozen object at 0x7fcd1cb51d30>},
                   pre_dispatch='2*n_jobs', random_state=None, refit=True,
                   return_train_score=False, scoring=None, verbose=2)

In [None]:
rnd_search_cv.best_estimator_

SVC(C=8.816264095523811, break_ties=False, cache_size=200, class_weight=None,
    coef0=0.0, decision_function_shape='ovr', degree=3,
    gamma=0.0012815338682938325, kernel='rbf', max_iter=-1, probability=False,
    random_state=None, shrinking=True, tol=0.001, verbose=False)

In [None]:
rnd_search_cv.best_score_

0.8619937302572033

In [None]:
y_pred = rnd_search_cv.best_estimator_.predict(X_test_scaled)
print(model_scores(y_test, y_pred, svm_clf.classes_))

Accuracy: 0.8827
Precision: 0.8847037028139608
Recall 0.8809936688922431
F1-Score: 0.8815708323383802

Confusion Matrix: 
[[ 941    0    9    3    2    5    7    9    3    1]
 [   0 1108    1    2    0    3    4    2   15    0]
 [   8    6  923   13    6    2   13   40   20    1]
 [   1    3   37  806    1   73    3   30   46   10]
 [   0    3    9    2  881    1    9   18    6   53]
 [   7    4    7   31   12  754   14   40   14    9]
 [  13    3   20    1   11   19  837   51    3    0]
 [   0   16   19    6   12    1    0  935    0   39]
 [  18    4   17   21   13   43    8   41  789   20]
 [   7    6    5   10   48    5    0   70    5  853]] 

Classification Report: 
              precision    recall  f1-score   support

           0       0.95      0.96      0.95       980
           1       0.96      0.98      0.97      1135
           2       0.88      0.89      0.89      1032
           3       0.90      0.80      0.85      1010
           4       0.89      0.90      0.90       