In [43]:
import numpy as np  
import pandas as pd  
import matplotlib.pyplot as plt  
from scipy.io import loadmat  
from scipy.optimize import minimize
%matplotlib inline

data = loadmat('Data/ex3data1.mat')  

X = np.insert(data['X'], 0, values=np.ones(data['X'].shape[0]), axis=1)

In [44]:
def sigmoid(z):  
    return 1 / (1 + np.exp(-z))

def cost(theta, X, y, learningRate):  
    first = np.dot(-y, np.log(sigmoid(np.dot(X, theta))))
    second = np.dot((1 - y), np.log(1 - sigmoid(np.dot(X, theta))))
    reg = (learningRate / 2 * len(X)) * np.sum(np.square(theta[1:]))
    return np.sum(first - second) / (len(X)) + reg

def gradient(theta, X, y, learningRate):  
    error = sigmoid(np.dot(X, theta)) - y 
    grad = np.add(np.dot(X.T,error), np.concatenate((np.array([0]), learningRate * theta[1:]))) / len(X)
    return grad

In [89]:
def one_vs_all(X, y, num_labels, learning_rate):  
    rows = X.shape[0]
    params = X.shape[1]

    # k X (n + 1) array for the parameters of each of the k classifiers
    all_theta = np.zeros((num_labels, params))

    # labels are 1-indexed instead of 0-indexed
    for i in range(1, num_labels + 1):
        theta = np.zeros(params)
        yi = np.reshape(y==i, y.shape[0])
        
        # minimize the objective function
        fmin = minimize(fun=cost, x0=theta, args=(X, yi, learning_rate), method='TNC', jac=gradient)
        all_theta[i-1,:] = fmin.x

    return all_theta

In [91]:
def predict_all(X, all_theta):  
    rows = X.shape[0]
    params = X.shape[1]
    num_labels = all_theta.shape[0]

    # convert to matrices
    X = np.matrix(X)
    all_theta = np.matrix(all_theta)

    # compute the class probability for each class on each training instance
    h = sigmoid(X * all_theta.T)

    # create array of the index with the maximum probability
    h_argmax = np.argmax(h, axis=1)

    # because our array was zero-indexed we need to add one for the true label prediction
    h_argmax = h_argmax + 1

    return h_argmax

In [97]:
all_theta = one_vs_all(X, data['y'], 10, 1)  
y_pred = predict_all(X, all_theta)  
correct = [1 if a == b else 0 for (a, b) in zip(y_pred, data['y'])]  
accuracy = (sum(map(int, correct)) / float(len(correct)))  
print('accuracy = {0}%'.format(accuracy * 100))



accuracy = 74.6%
