# One-Vs-All Approach or Multiclass Logistic Regression

In [367]:
from scipy.io import loadmat
from matplotlib import pyplot as plt
import numpy as np
import scipy.optimize as opt
from numpy import *

# Load the modified mnist dataset of 20px x 20px 
dataset = loadmat('/Users/admin/Desktop/machine-learning-ex/ex3/ex3data1.mat')

# Features
X = dataset['X']

# Labels
y = dataset['y']

# Merge the dataset to shuffle
Z = np.hstack((X, y))

# Shuffle the dataset to randomize the data
np.random.shuffle(Z)

# Split into training and testing set
train_x = Z[:3750,:400] 

train_y = Z[:3750:,400]
train_y = train_y.reshape(train_y.shape[0],1)

test_x = Z[3750:,:400]

test_y = Z[3750:,400]
test_y = test_y.reshape(test_y.shape[0],1)

# Insert additional feature x0 of all 1's
on_s = np.ones((train_x.shape[0],1))
train_x = np.hstack((on_s, train_x))

on_s2 = np.ones((test_x.shape[0],1))
test_x = np.hstack((on_s2, test_x))

# Parameters
theta = np.zeros((train_x.shape[1],1))

# Number of distinct labels
num_classes = 10

def sigmoid(z):
    '''
    Function to compute the sigmoid or logistic function
    
    Parameters:
        z (ndarray): Theta(transpose) times X, where X is the array of features and theta 
        is the array of parameters to be optimized.
        
    Returns:
        hypothesis (ndarray): The values of z after applying the sigmoid function.
    '''
    hypothesis = 1.0 / (1.0 + np.exp(np.dot(-1,z)))
    
    return hypothesis

def computeCost(theta, X, y, lmbda):
    '''
    Function to compute the regularized cost function
    
    Parameters:
        theta (ndarray): The array of parameters to be optimized.
        X (ndarray): The array of features.
        y (ndarray): The array of labels.
        lmbda (int): The regularization parameter lambda.
        
    Returns:
        J (ndarray): The computed regularized cost.
    '''
    
    m = X.shape[0]
    
    hypothesis = sigmoid(np.dot(X,theta)).reshape(y.shape[0],1)
    
    J = (-1/m * (np.dot(y.T, ma.log(hypothesis)) + np.dot((1 - y.T),ma.log(1 - hypothesis))))
    + (lmbda/(2*m) * np.sum((theta[1:] ** 2),axis = 0))
         
    return J.reshape(1,1)

def computeGradients(theta, X, y, lmbda):
    '''
    Function to compute the regularized gradients
    
    Parameters:
        theta (ndarray): The array of parameters to be optimized.
        X (ndarray): The array of features.
        y (ndarray): The array of labels.
        lmbda (int): The regularization parameter lambda.
        
    Returns:
        grad (ndarray): The computed regularized gradients.
    '''
    
    m = X.shape[0]
    
    hypothesis = sigmoid(np.dot(X,theta)).reshape(y.shape[0],1)
    
    theta[0] = 0
    
    grad = (1/m * np.dot(X.T,(hypothesis - y))).reshape(theta.shape[0],1) 
    + np.dot(lmbda/m,theta).reshape(theta.shape[0],1) 
    
    return grad.reshape(theta.shape[0],1)

def oneVsAll(X, y, num_classes, lmbda=0.1):
    '''
    Function to compute the optimal parameters for each label/class
    
    Parameters:
        X (ndarray): The array of features.
        y (ndarray): The array of labels.
        num_classes (int): The total number of distinct labels.
        lmbda (int): The regularization parameter lambda.
        
    Returns:
        all_theta (ndarray): The computed optimal parameters for each label.
    '''

    all_theta = np.zeros((num_classes, X.shape[1]))

    for i in range(num_classes):
        
        initial_theta = np.zeros((X.shape[1],1))
        result = opt.fmin_tnc(func=computeCost, x0=initial_theta, fprime=computeGradients, 
                              args=(X, (y == i + 1), lmbda),maxfun=50)
        all_theta[i,:] = result[0]
    
    return all_theta
        
def predict(all_theta, X):
    '''
    Function to predict the labels of unseen data
    
    Parameters:
        all_theta (ndarray): The computed optimal parameters for each label.
        X (ndarray): The array of features.
    
    Returns:
        prediction (ndarray): The array of predicted labels. 
    '''
    
    prediction = (np.argmax(np.dot(X, all_theta.T), axis=1) + 1).reshape(X.shape[0],1)
    
    return prediction

all_theta = oneVsAll(train_x, train_y, num_classes, 0.1)
p = predict(all_theta, test_x)

correct = float(np.sum(p == test_y).ravel())

# Total number of observations
n = test_y.shape[0]

# The accuracy of the model
acc = correct/n * 100
    
print(f"The accuracy is {str(round(acc,2))} %")

The accuracy is 89.84 %


# Neural Network Approach:

In [365]:
from scipy.io import loadmat
from matplotlib import pyplot as plt
import numpy as np

# Load the modified mnist dataset of 20px x 20px 
dataset = loadmat('/Users/admin/Desktop/machine-learning-ex/ex3/ex3data1.mat')

# Features
X = dataset['X']

# Labels
y = dataset['y']

# Insert additional feature x0 of all 1's
on_s = np.ones((X.shape[0],1))
X = np.hstack((on_s,X))

# Load the optimized parameters for the modified mnist dataset of 20px x 20px 
dataset2 = loadmat('/Users/admin/Desktop/machine-learning-ex/ex3/ex3weights.mat')

theta1 = dataset2['Theta1']

theta2 = dataset2['Theta2']


def neural_network(theta1, theta2, X):
    
    '''
    Function that defines the architecture of the neural network with 400 nodes in the input layer, 
    25 nodes in the hidden layer and 10 nodes in the output layer
    
    Parameters:
        theta1 (ndarray): The array of optimized weights to compute the first hidden layer.
        theta2 (ndarray): The array of optimized weights to compute the output.
        X (ndarray): The array of features.

    Returns:
        prediction (ndarray): The array of predicted labels. 
    '''
    
    hidden_layer = sigmoid(np.dot(X, theta1.T))
    
    hidden_layer = np.hstack((on_s, hidden_layer))
    
    output_layer = sigmoid(np.dot(hidden_layer, theta2.T))
    
    prediction = (np.argmax(output_layer,axis = 1) + 1).reshape(X.shape[0],1)
    
    return prediction

    
p = neural_network(theta1, theta2, X)

correct = float(np.sum(p == y).ravel())

# Total number of observations
n = y.shape[0]

# The accuracy of the model
acc = correct/n * 100
    
print(f"The accuracy is {str(round(acc,2))} %")

The accuracy is 97.52 %
