# Exercise 2 - One-vs-all MNIST
## Imports

In [89]:
import numpy as np

# So that changes to the a2 model are reflected here.
import a3
import importlib
importlib.reload(a3)
import a3
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV
from sklearn.multiclass import OneVsOneClassifier
from sklearn.model_selection import cross_val_score
from matplotlib import pyplot as plt
import pickle

## Functions

In [90]:
def predict(clfs, test):
    preds = []
    for i in range(len(clfs)):
        print(clfs[i].predict(test))
        if clfs[i].predict(test) == 1:
            preds.append(i)

def save_pickle(file_path, data):
    file = open(file_path, 'ab')
    pickle.dump(data, file)
    file.close()

def load_pickle(file_path):
    file = open(file_path, 'rb')
    data = pickle.load(file)
    file.close()
    return data

## Part 1 - Load & trim MNIST dataset

In [91]:
X_train, y_train, X_test, y_test = a3.mnist()

# Trim data.
train_size = 2000
test_size = 1000
X_train, y_train = X_train[:train_size, :], y_train[:train_size, :]
X_test, y_test = X_test[:test_size, :], y_test[:test_size, :]

## Part 2 - Train or load one-vs-all SVMs

In [94]:
# True to load from disk, False to train models.
load_grids = True

if load_grids:
    grids = load_pickle('pickles/grids.pickle')
else:
    params = {
        'C' : [0.1, 1, 10, 100],
        'gamma' : [1, 0.1, 0.01, 'scale', 'auto'],
    }

    # Perform grid search.
    grids = []
    for col in range(y_train.shape[1]):
        print("Training model for class", col)
        grid = GridSearchCV(SVC(), params, n_jobs=-1)
        grid.fit(X_train, y_train[:, col])
        grids.append(grid)

    save_pickle('pickles/grids.pickle', grids)

# Print individual classifier scores.
print("Cross validation accuracy for each class:")
for i in range(len(grids)):
    score = round(grids[i].best_score_, 3)
    print(f"{i}: {score}%")

# Fit classifiers into OneVsAll classifier.
clfs = [grid.best_estimator_ for grid in grids]
ova_clf = a3.OneVsAllClassifier(clfs)

# Calculate error & accuracy
y_pred_ova = ova_clf.predict(X_train)
y_diff = y_train - y_pred_ova

pred_count = int(y_train.shape[0])
error = int(np.sum(y_diff**2))
print(f"{error} errors out of {pred_count} predictions")

accuracy = pred_count - error / pred_count
print(f"OneVsAll training accuracy = {accuracy}%")

Cross validation accuracy for each class:
0: 0.991%
1: 0.992%
2: 0.977%
3: 0.983%
4: 0.979%
5: 0.979%
6: 0.985%
7: 0.975%
8: 0.971%
9: 0.971%
194 errors out of 2000 predictions
OneVsAll training accuracy = 1999.903%


## Train sklearn one-vs-one SVC

In [93]:
# Reformat y from 2d to 1d to work with sklearn one-vs-one classifier.
y_train_ovo = np.zeros(y_train.shape[0])
for i in range(y_train.shape[0]):
    y_train_ovo[i] = np.where(y_train[i] == 1)[0][0]

# Train model.
ovo_clf = OneVsOneClassifier(SVC(C=10, gamma=0.01)).fit(X_train, y_train_ovo)

# Cross_validate.
score = round(np.mean(cross_val_score(ovo_clf, X_train, y_train_ovo, n_jobs=-1)), 3)
print(f"One-vs-one SVC parameters = ")
print(f"Score = {score}%")


One-vs-one SVC parameters = 
Score = 0.922%
