<a href="https://colab.research.google.com/github/jackiekuen2/notes-handson-ml-tf/blob/master/ch5_ExerciseQ9_SVMMNIST.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

- SVM Classifier on MNIST dataset
    - Linear SVM
    - Linear SVM, with scaled data
    - RBF SVM, with scaled data
- Hyperparameters Tuning

In [0]:
from sklearn.datasets import fetch_openml
mnist = fetch_openml('mnist_784', version=1, cache=True)

In [0]:
X = mnist['data']
y = mnist['target']

X_train = X[:60000] # Take first 60k as training set
y_train = y[:60000]
X_test = X[60000:]
y_test = y[60000:]

In [0]:
# Shuffle the training set
import numpy as np

np.random.seed(42)
rnd_idx = np.random.permutation(60000)
X_train = X_train[rnd_idx]
y_train = y_train[rnd_idx]

# 1. Linear SVM classifier, use one-versus-all

In [0]:
# 1. Linear SVM classifier, use one-versus-all by default
from sklearn.svm import LinearSVC

lin_clf = LinearSVC(random_state=42)
lin_clf.fit(X_train[:10000], y_train[:10000])



LinearSVC(C=1.0, class_weight=None, dual=True, fit_intercept=True,
          intercept_scaling=1, loss='squared_hinge', max_iter=1000,
          multi_class='ovr', penalty='l2', random_state=42, tol=0.0001,
          verbose=0)

In [0]:
from sklearn.metrics import accuracy_score

y_pred = lin_clf.predict(X_test) # Validation accuracy
accuracy_score(y_test, y_pred)

0.8459

Validation accuaracy: 84.6%

# 2. Linear SVM classifer, with scaled data

In [0]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train.astype(np.float32)) # fit_transform for training set
X_test_scaled = scaler.transform(X_test.astype(np.float32)) # trasform ONLY for test set

In [0]:
lin_clf = LinearSVC(random_state=42)
lin_clf.fit(X_train_scaled[:10000], y_train[:10000])



LinearSVC(C=1.0, class_weight=None, dual=True, fit_intercept=True,
          intercept_scaling=1, loss='squared_hinge', max_iter=1000,
          multi_class='ovr', penalty='l2', random_state=42, tol=0.0001,
          verbose=0)

In [0]:
y_pred = lin_clf.predict(X_test_scaled)
accuracy_score(y_test, y_pred)

0.8682

Validation accuracy: 86.2%

# 3. SVC, use RBF and one-versus-all, with scaled data 

In [0]:
from sklearn.svm import SVC

svm_clf = SVC(decision_function_shape='ovr', gamma='auto')
svm_clf.fit(X_train_scaled[:10000], y_train[:10000])

SVC(C=1.0, break_ties=False, cache_size=200, class_weight=None, coef0=0.0,
    decision_function_shape='ovr', degree=3, gamma='auto', kernel='rbf',
    max_iter=-1, probability=False, random_state=None, shrinking=True,
    tol=0.001, verbose=False)

In [0]:
y_pred = svm_clf.predict(X_test_scaled)
accuracy_score(y_test, y_pred)

0.9425

Validation accuracy: 94.3%

## Hyperparameters Tunning with Randomized Search

In [6]:
from sklearn.model_selection import RandomizedSearchCV
from scipy.stats import reciprocal, uniform
from sklearn.svm import SVC

svm_clf = SVC(decision_function_shape='ovr')

param_distributions = {
    'gamma': reciprocal(0.001, 0.1),
    'C': uniform(1, 10)
}

random_search_cv = RandomizedSearchCV(svm_clf, param_distributions=param_distributions, 
                                      n_iter=10, cv=3, verbose=2, random_state=42, n_jobs=-1)
random_search_cv.fit(X_train_scaled[:1000], y_train[:1000])

Fitting 3 folds for each of 10 candidates, totalling 30 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 2 concurrent workers.
[Parallel(n_jobs=-1)]: Done  30 out of  30 | elapsed:   20.6s finished


RandomizedSearchCV(cv=3, error_score=nan,
                   estimator=SVC(C=1.0, break_ties=False, cache_size=200,
                                 class_weight=None, coef0=0.0,
                                 decision_function_shape='ovr', degree=3,
                                 gamma='scale', kernel='rbf', max_iter=-1,
                                 probability=False, random_state=None,
                                 shrinking=True, tol=0.001, verbose=False),
                   iid='deprecated', n_iter=10, n_jobs=-1,
                   param_distributions={'C': <scipy.stats._distn_infrastructure.rv_frozen object at 0x7fbb615b3940>,
                                        'gamma': <scipy.stats._distn_infrastructure.rv_frozen object at 0x7fbb61b16358>},
                   pre_dispatch='2*n_jobs', random_state=42, refit=True,
                   return_train_score=False, scoring=None, verbose=2)

In [7]:
random_search_cv.best_estimator_

SVC(C=2.560186404424365, break_ties=False, cache_size=200, class_weight=None,
    coef0=0.0, decision_function_shape='ovr', degree=3,
    gamma=0.002051110418843397, kernel='rbf', max_iter=-1, probability=False,
    random_state=None, shrinking=True, tol=0.001, verbose=False)

In [8]:
random_search_cv.best_params_

{'C': 2.560186404424365, 'gamma': 0.002051110418843397}

In [9]:
random_search_cv.best_score_

0.8560057063051075

In [10]:
# Fit whole training set in random_search_cv best_estimator
random_search_cv.best_estimator_.fit(X_train_scaled, y_train)

SVC(C=2.560186404424365, break_ties=False, cache_size=200, class_weight=None,
    coef0=0.0, decision_function_shape='ovr', degree=3,
    gamma=0.002051110418843397, kernel='rbf', max_iter=-1, probability=False,
    random_state=None, shrinking=True, tol=0.001, verbose=False)

In [12]:
from sklearn.metrics import accuracy_score
y_pred = random_search_cv.best_estimator_.predict(X_test_scaled)
accuracy_score(y_test, y_pred)

0.9708