# Exercise 2 - One-vs-all MNIST
## Imports

In [7]:
import numpy as np
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV
from sklearn.multiclass import OneVsOneClassifier
from sklearn.model_selection import cross_val_score
from matplotlib import pyplot as plt
import pickle
import os
print("Current working directory:", os.getcwd(), sep="\n")

# So that changes to the a3 model are reflected here.
import a3
import importlib
importlib.reload(a3)
import a3


Current working directory:
/Users/atakancoban/Desktop/School/2dv506 - Machine learning/Assignment 3/Lecture_8


## Functions

In [8]:
def predict(clfs, test):
    preds = []
    for i in range(len(clfs)):
        print(clfs[i].predict(test))
        if clfs[i].predict(test) == 1:
            preds.append(i)

def save_pickle(file_path, data):
    file = open(file_path, 'ab')
    pickle.dump(data, file)
    file.close()

def load_pickle(file_path):
    file = open(file_path, 'rb')
    data = pickle.load(file)
    file.close()
    return data

## Part 1 - Load & trim MNIST dataset

In [9]:
X_train, y_train, X_test, y_test = a3.mnist()

# Trim data.
trim = True
if trim:
    train_size = 10000
    test_size = 10000
    X_train, y_train = X_train[:train_size, :], y_train[:train_size, :]
    X_test, y_test = X_test[:test_size, :], y_test[:test_size, :]

## Part 2 - Train or load SVMs for each class

In [10]:
# Loading makes results fail on my laptop atm.
# True to load from disk, False to train models.
load_clfs = True

C = 10
gamma = 0.01

if load_clfs:
    clfs = load_pickle('pickles/clfs.pickle')
else:
    # Train model for each class
    clfs = []
    for col in range(y_train.shape[1]):
        print("Training model for class", col)
        clf = SVC(C=C, gamma=gamma).fit(X_train, y_train[:, col])
        clfs.append(clf)
    print()

    save_pickle('pickles/clfs.pickle', clfs)

## Part 3 - Fit classifiers into OneVsAll Classifier
### Note
If the one-vs-all classifier predicts multiple digits but is still correct for one of them, it is considered as an
accurate prediction.

In [11]:
# Loading makes results fail on my laptop atm.
load_y_pred_ova = False

# Train/load OneVsAll classifier and predict training samples.
ova_clf = a3.OneVsAllClassifier(clfs)

print("Calculating test errors for OneVsAll...")
if load_y_pred_ova:
    y_pred_ova = load_pickle("pickles/y_pred_ova.pickle")
else:
    y_pred_ova = ova_clf.predict(X_test)
    save_pickle("pickles/y_pred_ova.pickle", y_pred_ova)

# Calculate error count.
y_diff_ova = y_test - y_pred_ova
pred_count = int(y_test.shape[0])
error_ova = np.where(y_diff_ova == 1)[0].shape[0]
print(f"{error_ova} errors out of {pred_count} predictions")

# Calculate accuracy.
accuracy_ova = round((pred_count - error_ova) / pred_count, 3)
print(f"Accuracy = {accuracy_ova*100}%")

Calculating test errors for OneVsAll...
709 errors out of 10000 predictions
Accuracy = 92.9%


## Part 4 - Train sklearn one-vs-one SVC

In [12]:
load_y_pred_ovo = False

# Reformat y from 2d to 1d to work with sklearn one-vs-one classifier.
# True to load from disk, False to train models.
y_train_ovo = np.zeros(y_train.shape[0])
for i in range(y_train.shape[0]):
    y_train_ovo[i] = np.where(y_train[i] == 1)[0][0]

print("Training OneVsOne classifier")
# Train/load OneVsOne classifier and predict training samples.
ovo_clf = OneVsOneClassifier(SVC(C=C, gamma=gamma)).fit(X_train, y_train_ovo)
print("Done...\n")

print("Calculating test errors for OneVsOne...")

y_test_ovo = np.zeros(y_test.shape[0])
for i in range(y_test.shape[0]):
    y_test_ovo[i] = np.where(y_test[i] == 1)[0][0]

if load_y_pred_ovo:
    y_pred_ovo = load_pickle("pickles/y_pred_ovo.pickle")
else:
    y_pred_ovo = ovo_clf.predict(X_test)
    save_pickle("pickles/y_pred_ovo.pickle", y_pred_ovo)

# Error count.
y_diff_ovo = (y_test_ovo != y_pred_ovo).astype(int)
error_ovo = np.sum(y_diff_ovo)
print(f"{error_ovo} errors out of {pred_count} predictions")

# Calculate accuracy.
accuracy_ovo = round((pred_count - error_ovo) / pred_count, 3)
print(f"Accuracy = {accuracy_ovo*100}%")

Training OneVsOne classifier
Done...

Calculating test errors for OneVsOne...
349 errors out of 10000 predictions
Accuracy = 96.5%


## Part 5 - Evaluation of multi-class classification choices

In [13]:
# TODO: Create confusion matrix and evaluate both models (Why are they so accurate?!?! maybe you can upgrade dataset size).