## Imports

In [1]:
import numpy as np
import util
import LinearRegression

## Data Loading and Processing

In [5]:
X_train, y_train = util.load_data('data/mnist.mat')
X_test, y_test = util.load_data('data/mnist.mat', train = False)
X_train = util.normalize(X_train)
X_test = util.normalize(X_test)
X_train = util.append_bias(X_train)
X_test = util.append_bias(X_test)

## Binary Classification

In [6]:
X_train_binary, y_train_binary = util.generate_binary_dataset(X_train, y_train, 0, 1)
X_test_binary, y_test_binary = util.generate_binary_dataset(X_test, y_test, 0, 1)
lr = LinearRegression.LinearRegression('binary')
lr.fit(X_train_binary, y_train_binary)
y_pred_binary = lr.predict(X_test_binary)
acc = lr.accuracy_score(y_test_binary, y_pred_binary)
print('Accuracy: {}'.format(acc))


Accuracy: 0.5361702127659574


## One vs. Rest Multi-Class Classification

In [8]:
num_classes = 10
y_train_onehot = util.onehot_encode(y_train, num_classes)
lr = LinearRegression.LinearRegression('ovr')
lr.fit(X_train, y_train_onehot)
y_pred = lr.predict(X_test)
print('Accuracy: {}'.format(lr.accuracy_score(y_test, y_pred)))

Accuracy: 0.8603


## One vs. One Multi-Class Classification

In [9]:
votes = np.zeros((y_test.shape[0], 10))
classifiers = util.generate_classifiers(num_classes)
lr = LinearRegression.LinearRegression('binary')

for classifier in classifiers:
    X_train_temp, y_train_temp = util.generate_binary_dataset(X_train, y_train, classifier[0], classifier[1])
    X_test_temp, y_test_temp = util.generate_binary_dataset(X_test, y_test, classifier[0], classifier[1])
    idxs = util.generate_binary_index(X_test, y_test, classifier[0], classifier[1])
    lr.fit(X_train_temp, y_train_temp)
    y_pred = lr.predict(X_test_temp)

    for pred, idx in zip(y_pred, idxs):
        if pred.any() == 1:
            votes[idx][classifier[0]] += 1
        if pred.any() == -1:
            votes[idx][classifier[1]] += 1

y_pred = np.zeros((y_test.shape[0]))

for i in range(votes.shape[0]):
    y_pred[i] = np.random.choice(np.where(votes[i] == votes[i].max())[0])
print('Accuracy: {}'.format(lr.accuracy_score(y_test, y_pred)))

Accuracy: 0.1005049
