In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import torch
import scipy.io

In [2]:
data = scipy.io.loadmat('ex3data1.mat', mat_dtype=True)
x, y = data['X'], data['y']
y = y.ravel()
M = len(y)

X = np.vstack((np.ones(M), x.T))

N = X.shape[0]
K = len(np.unique(y))

# Vectorizing regularized logistic regression

In [3]:
sigmoid = lambda x: 1 / (1 + np.exp(-x))

## Vectorizing the cost function

In [4]:
def costFunctionReg(X, y, M, N, theta, lambda_):
    h = sigmoid(theta.T.dot(X))
    J = (-np.log(h).dot(y) - np.log(1-h).dot(1-y))/M + lambda_ / (2 * M) * theta[1:].dot(theta[1:].T)
    grad = X.dot((h - y))/M + (lambda_/M) * np.array([0 if i == 0 else theta[i] for i in range(len(theta))])
    return J, grad

In [16]:
lambda_test = 3
M_test = 5
theta_test = np.array([-2., -1., 1., 2.])
X_test = np.vstack((np.ones(5), np.array([range(1, 16)]).reshape(3, 5)/10))
y_test = np.array([1., 0., 1., 0., 1.])

In [6]:
J, grad = costFunctionReg(X_test, y_test, M_test, _, theta_test, lambda_test)
J, grad

(2.534819396109744,
 array([ 0.14656137, -0.54855841,  0.72472227,  1.39800296]))

## One-vs-all Classification

In [7]:
iterations = 5000
lambda_ = .1
alpha = .3

In [8]:
def gradientDescent(X, y, M, N, initial_theta, alpha, lambda_, iterations):
    theta = initial_theta.copy()
    for i in range(0, iterations):
        _, grad = costFunctionReg(X, y, M, N, theta, lambda_)
        theta -= alpha * grad
    
    return theta

In [9]:
def oneVsAll(X, y, K):

    theta_0 = np.zeros(N)
    all_theta = []
    
    for k in range(1, K + 1):
        theta = gradientDescent(X, (y == k).astype(int), M, N, theta_0, alpha, lambda_, iterations)
        all_theta.append(theta)
        
    return np.array(all_theta)

In [10]:
all_theta = oneVsAll(X, y, K)

In [11]:
def predict(X, initial_theta):
    return sigmoid(X.T.dot(initial_theta.T))

In [12]:
predictions = predict(X, all_theta)

In [13]:
def accuracy(predictions, y):
    selected_y = np.argmax(predictions, axis=1) + 1
    return (selected_y == y).mean()

In [14]:
accuracy(predictions, y)

0.935