In [1]:
import pandas as pd
from sklearn.svm import SVC
import numpy as np
from tensorflow import keras
import time
from sklearn.multiclass import OneVsRestClassifier
from sklearn.svm import LinearSVC

### Dataset normalization

In [2]:
(x_train, y_train), (x_test, y_test) = keras.datasets.mnist.load_data()
x_train, y_train = x_train[:30000], y_train[:30000]
x_valid, y_valid = x_test[:3000], y_test[:3000]
x_test, y_test = x_test[3000:], y_test[3000:]

x_train = x_train.astype('float32')
x_test = x_test.astype('float32')
x_valid = x_valid.astype('float32')

x_train = x_train / 255.0
x_test = x_test / 255.0
x_valid = x_valid / 255.0


In [3]:
x_train_flat = np.array([x.ravel() for x in x_train])
x_test_flat = np.array([x.ravel() for x in x_test])

In [4]:
start_t = time.time()
clf = SVC(gamma='auto')
clf.fit(x_train_flat, y_train)
print('Training time: ', time.time()-start_t, 'seconds')

Training time:  100.34077525138855 seconds


In [4]:
start_t = time.time()
clf = LinearSVC()
clf.fit(x_train_flat, y_train)
print('Training time: ', time.time()-start_t, 'seconds')

Training time:  0.8498804569244385 seconds




In [13]:
start_t = time.time()
clf = OneVsRestClassifier(LinearSVC(), n_jobs=-1)
clf.fit(x_train_flat, y_train)
print('Training time: ', time.time()-start_t, 'seconds')

Training time:  12.749311447143555 seconds


In [17]:
start_t = time.time()
clf = OneVsRestClassifier(SVC(gamma='auto'), n_jobs=-1)
clf.fit(x_train_flat, y_train)
print('Training time: ', time.time()-start_t, 'seconds')

Training time:  207.38757920265198 seconds


In [5]:
def compute_accuracy(y_true, y_pred):
    '''Compute classification accuracy with a fixed threshold on distances.
    '''
    return np.mean(y_pred == y_true)

results_predict = clf.predict(x_test_flat)
acc = compute_accuracy(y_test, results_predict)
acc

0.9475714285714286

In [10]:
set(y_test)

{0, 1, 2, 3, 4, 5, 6, 7, 8, 9}

In [13]:
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_test, results_predict, labels=list(set(y_test)))

In [25]:
def print_confusion_matrix(confusion_matrix, class_names, figsize = (10,7), fontsize=14):
    df_cm = pd.DataFrame(
        confusion_matrix, index=class_names, columns=class_names, 
    )
    fig = plt.figure(figsize=figsize)
    try:
        heatmap = sns.heatmap(df_cm, annot=True, fmt="d")
    except ValueError:
        raise ValueError("Confusion matrix values must be integers.")
    heatmap.yaxis.set_ticklabels(heatmap.yaxis.get_ticklabels(), rotation=0, ha='right', fontsize=fontsize)
    heatmap.xaxis.set_ticklabels(heatmap.xaxis.get_ticklabels(), rotation=45, ha='right', fontsize=fontsize)
    plt.ylabel('True label')
    plt.xlabel('Predicted label')
    plt.tight_layout()
    return fig

print_confusion_matrix(cm, [str(x) for x in set(y_test)]).savefig('confusion_matrix_svc.png')

In [21]:
import seaborn as sns
from matplotlib import pyplot as plt

group_names = [str(x) for x in set(y_test)]X_train shapeX_train shape
cf_matrix = cm
group_counts = ["{0:0.0f}".format(value) for value in
                cf_matrix.flatten()]
group_percentages = ["{0:.2%}".format(value) for value in
                     cf_matrix.flatten()/np.sum(cf_matrix)]
labels = [f"{v1}\n{v2}\n{v3}" for v1, v2, v3 in
          zip(group_names,group_counts,group_percentages)]
labels = np.asarray(labels).reshape(2,2)
sns.heatmap(cf_matrix, annot=labels, fmt='', cmap='Blues')

ValueError: cannot reshape array of size 10 into shape (2,2)

#### SVC - RBF - ACC: 0.9475714285714286
#### SVC - Linear - ACC: 
#### LinearSVC - ACC: 0.8784285714285714
#### OneVsRestClassifier - SVC linear - ACC: 0.8804285714285714

#### 30.000 samples
#### OneVsRestClassifier - LinearSVC - ACC: 0.9215714285714286 Training time:  12.749311447143555 seconds
#### SVC - RBF - ACC: 0.9475714285714286 Training time:  98.5876259803772 seconds
#### OneVsRestClassifier - SVC RBF - ACC: 0.9304285714285714 Training time:  207.38757920265198 seconds
