# Neural Networks in Python

## One-vs-all Classification

Setup the parameters for this part of the exercise

In [1]:
# 20x20 Input Images of Digits
# 10 labels, from 0 to 9
input_layer_size = 400
num_labels = 10

In [2]:
import numpy as np
import matplotlib.pyplot as plt
from scipy.io import loadmat

%matplotlib inline

Loading and Visualizing Data

In [3]:
# Load Training Data
print 'Loading and Visualizing Data...'

data = loadmat('datasets/ex3data1.mat') # training data stored in arrays X, y
X = data['X']
y = data['y']
print X.shape, y.shape

Loading and Visualizing Data...
(5000, 400) (5000, 1)


### Sample from the MNIST Dataset
![MNIST Sample](https://www.filepicker.io/api/file/cx0w731pSwa3xIMyGKIu)

Replace all '10's in the y array with '0'

In [4]:
print np.unique(y)
y_replaced = y
np.place(y_replaced, y == 10, [0])
print np.unique(y_replaced)

[ 1  2  3  4  5  6  7  8  9 10]
[0 1 2 3 4 5 6 7 8 9]


Define functions for vectorized logistic regression in one-vs-all classification

In [5]:
# All functions from EX2

def sigmoid(z):
    g = 1 / (1 + np.exp(-z))
    return g

def computeLogisticParts(theta, X, y):
    h = sigmoid(np.dot(X, theta))
    pos = np.vdot(y, np.log(h)) # computes summation also
    neg = np.vdot((1-y), np.log(1-h)) # computes summation also
    
    return h, pos, neg

def cost(theta, X, y):
    m, n = X.shape
    h, pos, neg = computeLogisticParts(theta, X, y)

    J = (pos + neg) / (-m)
    return J

def gradient(theta, X, y):
    m, n = X.shape
    h, pos, neg = computeLogisticParts(theta, X, y)

    grad = np.dot((h - y), X) / m
    return grad

def costReg(theta, X, y, reg_lambda):
    m, n = X.shape
    temp_theta = theta
    temp_theta[0] = 0
    J = cost(theta, X, y)
    J = J + (reg_lambda / (2 * m)) * (np.vdot(temp_theta, temp_theta))
    return J

def gradientReg(theta, X, y, reg_lambda):
    m, n = X.shape
    temp_theta = theta
    temp_theta[0] = 0
    grad = gradient(theta, X, y)
    grad = grad + (reg_lambda / m) * temp_theta
    return grad

In [6]:
from scipy.optimize import minimize

def oneVsAll(X, y, num_labels, reg_lambda):
    m, n = X.shape
    all_theta = np.zeros(( num_labels, n+1 ))

    # Add ones to the X data matrix
    X_ones = np.column_stack(( np.ones(m), X ))
    print 'X_ones is ', X_ones.shape
    
    init_theta = np.zeros(n+1)

    for i in range(num_labels):
        y_i = np.array([1 if label == i else 0 for label in y])
        # [theta] = fmincg (@(t)(lrCostFunction(t, X, (y == c), lambda)), initial_theta, options);
        #res = fmin_tnc(func=costReg, x0=init_theta, fprime=gradientReg, args=(X_ones, y_i, reg_lambda))
        res = minimize(method='SLSQP', fun=costReg, x0=init_theta, jac=gradientReg, args=(X_ones, y_i, reg_lambda))
        theta_reg = res.x
        print 'Class', i, 'cost = ', costReg(theta_reg, X_ones, y_i, reg_lambda)
        all_theta[i,:] = theta_reg
    
    return all_theta

In [7]:
reg_lambda = 0.1
all_theta = oneVsAll(X, y, num_labels, reg_lambda)

X_ones is  (5000, 401)
Class 0 cost =  0.0378434412349
Class 1 cost =  0.0276097817247
Class 2 cost =  0.0876765208075
Class 3 cost =  0.0938895639083
Class 4 cost =  0.0502609669125
Class 5 cost =  0.061479507141
Class 6 cost =  0.0363146199082
Class 7 cost =  0.0442682440829
Class 8 cost =  0.169855632643
Class 9 cost =  0.116566352125


In [8]:
def predictOneVsAll(all_theta, X):
    m, n = X.shape
    # Add ones to the X data matrix
    X_ones = np.column_stack(( np.ones(m), X ))
    p = sigmoid(np.dot(X_ones, all_theta.T))
    p_max = np.argmax(p, axis=1)
    return p_max
    #return [1 if p >= 0.5 else 0 for p in probabilities]

In [9]:
# Compute accuracy on our training set
p = zip(predictOneVsAll(all_theta, X), y)
results = [1 if a == b else 0 for (a, b) in p]
print 'Train Accuracy: ', float(sum(results)) / float(len(results))

Train Accuracy:  0.9234


## Neural Networks

In [10]:
hidden_layer_size = 25 # 25 hidden units

Load Parameters theta1 and theta2

In [11]:
weights = loadmat('datasets/ex3weights.mat')
theta1 = weights['Theta1']
theta2 = weights['Theta2']
print theta1.shape, theta2.shape

(25, 401) (10, 26)


Predict classes based on pre-computed theta1 and theta2

In [12]:
m, n = X.shape
# already defined, but num_labels = theta2.shape[0]

# Input Layer
a1 = np.column_stack(( np.ones(m), X )) # a1 -> (5000, 401)

# Hidden Layer
z2 = np.dot(theta1, a1.T) # z2 -> (25, 5000)
a2_sig = sigmoid(z2) # a2_sig -> (25, 5000)
a2 = np.vstack(( np.ones(a2_sig.shape[1]), a2_sig )) # a2 -> (26, 5000)

# Output layer
z3 = np.dot(theta2, a2) # z3 -> (10, 5000)
a3 = sigmoid(z3).T # a3 -> (10, 5000) transposed to (5000, 10)
print "Output layer size: ", a3.shape

Output layer size:  (5000, 10)


In [13]:
# Compute accuracy of the given thetas on our neural network

pred_nn = np.argmax(a3, axis=1)

# Correct the index shift in the prediction (nn features are 1,2,...,9,0 but argmax gives 0,1,...,8,9)
pred_nn_inc = pred_nn + 1
pred_nn_corrected = pred_nn_inc
np.place(pred_nn_corrected, pred_nn_inc == 10, [0])

p_nn = zip(pred_nn_corrected, y.reshape(-1))
results_nn = [1 if a == b else 0 for (a, b) in p_nn]
print 'Train Accuracy: ', float(sum(results_nn)) / float(len(results_nn))

Train Accuracy:  0.9752
