# Exercise 2 - One-vs-all MNIST
## Imports

In [1]:
import numpy as np
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV
from sklearn.multiclass import OneVsOneClassifier
from sklearn.model_selection import cross_val_score
from matplotlib import pyplot as plt
import pickle
import os
print("Current working directory (Make sure it's the root, \"Assignment 3\"):", os.getcwd(), sep="\n")

# So that changes to the a3 model are reflected here.
import a3
import importlib
importlib.reload(a3)
import a3


Current working directory (Make sure it's the root, "Assignment 3"):
/Users/atakancoban/Desktop/School/2dv506 - Machine learning/Assignment 3/Lecture_8


## Functions

In [2]:
def predict(clfs, test):
    preds = []
    for i in range(len(clfs)):
        print(clfs[i].predict(test))
        if clfs[i].predict(test) == 1:
            preds.append(i)

def save_pickle(file_path, data):
    file = open(file_path, 'ab')
    pickle.dump(data, file)
    file.close()

def load_pickle(file_path):
    file = open(file_path, 'rb')
    data = pickle.load(file)
    file.close()
    return data

## Part 1 - Load & trim MNIST dataset

In [3]:
X_train, y_train, X_test, y_test = a3.mnist()

# Trim data.
train_size = 5000
test_size = 1000
X_train, y_train = X_train[:train_size, :], y_train[:train_size, :]
X_test, y_test = X_test[:test_size, :], y_test[:test_size, :]

## Part 2 - Train or load SVMs for each class

In [4]:
# True to load from disk, False to train models.
load_grids = True

if load_grids:
    grids = load_pickle('pickles/grids.pickle')
else:
    params = {
        'C' : [0.1, 1, 10, 100],
        'gamma' : [1, 0.1, 0.01, 'scale', 'auto'],
    }

    # Perform grid search.
    grids = []
    for col in range(y_train.shape[1]):
        print("Training model for class", col)
        grid = GridSearchCV(SVC(), params, n_jobs=-1)
        grid.fit(X_train, y_train[:, col])
        grids.append(grid)

    save_pickle('pickles/grids.pickle', grids)

# Print individual classifier scores.
print("Cross validation accuracy for each class:")
for i in range(len(grids)):
    score = round(grids[i].best_score_ * 100, 3)
    print(f"{i}: {score}%")
print()

Cross validation accuracy for each class:
0: 99.1%
1: 99.2%
2: 97.7%
3: 98.3%
4: 97.9%
5: 97.9%
6: 98.5%
7: 97.5%
8: 97.1%
9: 97.1%



## Part 3 - Fit classifiers into OneVsAll Classifier
### Note
If the one-vs-all classifier predicts multiple digits but is still correct for one of them, it is considered as an
accurate prediction.

In [5]:
# Train OneVsAll classifier and predict training samples.
clfs = [grid.best_estimator_ for grid in grids]
ova_clf = a3.OneVsAllClassifier(clfs)
y_pred_ova = ova_clf.predict(X_train)

# Calculate error count.
print("Calculating training errors for OneVsAll...")
y_diff_ova = y_train - y_pred_ova
pred_count = int(y_train.shape[0])
error_ova = np.where(y_diff_ova == 1)[0].shape[0]
print(f"{error_ova} errors out of {pred_count} predictions")

# Calculate accuracy.
accuracy_ova = (pred_count - error_ova) / pred_count
print(f"Accuracy = {accuracy_ova*100}%")

Calculating training errors for OneVsAll...
557 errors out of 5000 predictions
Accuracy = 88.86%


## Train sklearn one-vs-one SVC

In [7]:
load_predictions = True

# Reformat y from 2d to 1d to work with sklearn one-vs-one classifier.
y_train_ovo = np.zeros(y_train.shape[0])
for i in range(y_train.shape[0]):
    y_train_ovo[i] = np.where(y_train[i] == 1)[0][0]

# Train/load OneVsOne classifier and predict training samples.
ovo_clf = OneVsOneClassifier(SVC(C=10, gamma=0.01)).fit(X_train, y_train_ovo)
if load_predictions:
    y_pred_ovo = load_pickle("pickles/y_preds_ovo.pickle")
else:
    y_pred_ovo = ovo_clf.predict(X_train)
    save_pickle("pickles/y_preds_ovo.pickle", y_pred_ovo)

# Error count.
y_diff_ovo = (y_train_ovo != y_pred_ovo).astype(int)
error_ovo = np.sum(y_diff_ovo)
print(f"{error_ovo} errors out of {pred_count} predictions")

# Calculate accuracy.
accuracy_ovo = (pred_count - error_ovo) / pred_count
print(f"Accuracy = {accuracy_ovo*100}%")

1 errors out of 5000 predictions
Accuracy = 99.98%
