In [None]:
import numpy as np
from scipy.optimize import fmin_cg
import matplotlib.pyplot as plt

In [None]:
def sigmoid(z):
    return 1 / (1 + np.exp(-z))

In [None]:
def h(X, theta):
    return sigmoid(X.dot(theta))

In [None]:
def cost_function_reg(theta, X, y, reg_lambda):
    m = len(y)
    y_zero = (1 - y).dot(np.log(1 - h(X, theta)))
    y_one = y.dot(np.log(h(X, theta)))
    reg = (reg_lambda / (2 * m)) * sum(theta[1:] ** 2)
    J = (-1 / m) * (y_zero + y_one) + reg
    return J

In [None]:
def gradient_reg(theta, X, y, reg_lambda):
    m = len(y)
    reg = (reg_lambda / m) * theta
    reg[0] = 0
    return ((h(X, theta) - y).dot(X) / m) + reg

In [None]:
def one_vs_all(X, y, num_labels, reg_lambda):
    m, n = X.shape
    all_theta = np.zeros((num_labels, n + 1))
    X = np.hstack((np.ones((m, 1)), X))
    for c in range(1, num_labels + 1):
        initial_theta = np.zeros((n + 1, 1))
        theta = fmin_cg(f=cost_function_reg, x0=initial_theta, fprime=gradient_reg, args=(X, y == c, reg_lambda), maxiter=100)
        all_theta[c - 1, :] = theta.T

    return all_theta

In [None]:
def predict_one_vs_all(all_theta, X):
    m = len(X)
    X = np.hstack((np.ones((m, 1)), X))
    return np.argmax(h(X, all_theta.T), axis=1) + 1 # +1 for the 0 -> 10 transition

In [None]:
def plot_random_samples(X):
    size = 20
    random_samples = np.random.randint(X.shape[0], size=25)
    plt.figure(figsize=(5, 5))
    for i, sample in enumerate(random_samples):
        plt.subplot(5, 5, i + 1)
        plt.xticks([])
        plt.yticks([])
        plt.grid(False)
        plt.imshow(X[sample].reshape(size, size).T, cmap=plt.cm.binary)
    plt.show()

In [None]:
input_layer_size = 400 # 20 x 20 pixels
num_labels = 10

# Loading training data
X = np.loadtxt("ex3data.csv", delimiter=',')
y = X[:, -1]
X = X[:, 0:-1]
m = len(y)

In [None]:
# Visualising some samples
plot_random_samples(X)

In [None]:
# Testing logistic regression cost function with regularisation
theta_t = np.array([-2, -1, 1, 2]).T
X_t = np.hstack((
    np.ones((5, 1)),
    np.reshape(np.arange(0.1, 1.6, 0.1), (3, 5)).T
))
y_t = np.array([1, 0, 1, 0, 1]).T
lamdba_t = 3

J = cost_function_reg(theta_t, X_t, y_t, lamdba_t)
grad = gradient_reg(theta_t, X_t, y_t, lamdba_t)

In [None]:
print("Cost: {}".format(J))
print("Expected cost: 2.534819")
print("Gradients:")
print(grad)
print("Expected gradients:")
print("[0.146561, -0.548558, 0.724722, 1.398003]")

In [None]:
# Training One vs All
reg_lambda = 0.1
all_theta = one_vs_all(X, y, num_labels, reg_lambda)

In [None]:
pred = predict_one_vs_all(all_theta, X)

print('Training Set Accuracy: {}'.format(np.mean((pred == y)) * 100))