In [453]:
import numpy as np
import matplotlib.pyplot as plt
import scipy.optimize as opt
import multiprocessing as mp
import pandas as pd
import scipy.io as sio

In [454]:
input_layer_size  = 400
hidden_layer_size = 25
num_labels = 10
print('Loading and Visualizing Data ...\n')
mat_contents = sio.loadmat('ex4data1.mat')
X = mat_contents['X']
y = mat_contents['y']
m = len(y)
print(m)

rand_indices = np.random.permutation(m)
print(rand_indices)
rand_indices = rand_indices.reshape(-1, 1)
sel = X[rand_indices[0:100, :]]



Loading and Visualizing Data ...

5000
[1645 3102 3293 ... 3535 2104 1476]


In [455]:
def arrangeParams(t1, t2):
    return np.concatenate((t1.reshape(t1.size, 1, order='F'), t2.reshape(t2.size, 1, order='F')), axis=0)

In [456]:
def displayData(X):
    fig, ax = plt.subplots(10,10,sharex=True,sharey=True)
    img_num = 0
    for i in range(10):
        for j in range(10):
            # Convert column vector into 20x20 pixel matrix
            # You have to transpose to display correctly
            img = X[img_num,:].reshape(20,20).T
            ax[i][j].imshow(img,cmap='gray')
            img_num += 1

    return (fig, ax)
    
#figure, ax = displayData(sel)
#figure.show()

In [457]:
print('\nLoading Saved Neural Network Parameters ...\n')
nn_contents = sio.loadmat('ex4weights.mat')

Theta1 = nn_contents['Theta1']
Theta2 = nn_contents['Theta2']

nn_params = arrangeParams(Theta1, Theta2)
print(Theta1.shape)
print(Theta2.shape)
print(Theta1[0][1])
print(Theta2[0][1])


Loading Saved Neural Network Parameters ...

(25, 401)
(10, 26)
-1.0562416310683364e-08
-1.2124449845935756


In [458]:
def sigmoid(z):
    return 1 / (1 + np.exp(-z));

In [459]:
def sigmoidGradient(z):
    return sigmoid(z) * (1-sigmoid(z))

In [460]:
def nnCostFunction(nn_params_l, input_layer_size_l, hidden_layer_size_l, num_labels_l, X_l, y_l, lambda_l):
    #print(input_layer_size_l)
    #print(hidden_layer_size_l)
    #Theta1 = reshape(nn_params_l(1:hidden_layer_size_l * (input_layer_size_l + 1)), hidden_layer_size_l, (input_layer_size_l + 1));
    Theta1_l = nn_params_l[0:(hidden_layer_size_l * (input_layer_size_l + 1))].reshape(input_layer_size_l + 1, hidden_layer_size_l).T
    Theta2_l = nn_params_l[(Theta1_l.size):nn_params_l.size].reshape(hidden_layer_size_l + 1, num_labels_l).T
    #print(Theta1_l[0][1])
    #print(Theta2_l[0][1])
    #print('\nFeedforward Using Neural Network ...\n')
    #print(X_l.shape[0])
    m = X_l.shape[0] #5000X400    
    X_l = np.c_[np.ones((m, 1)), X_l] #5000X401
    z1 = X_l.dot(Theta1_l.T)
    a1 = sigmoid(z1)
    a1 = np.c_[np.ones((m, 1)), a1]
    
    z2=  a1.dot(Theta2_l.T)

    h = sigmoid(z2)
    #print(h[: 2])
    J = 0
    for k in range(1, num_labels_l + 1):
        yk = (y_l==k) * 1
        J = J - (1/m) * np.sum(yk * np.log(h.T[k-1:k].T) + (1-yk) * np.log(1-h.T[k-1:k].T))    
    #print(J)
    
    rtheta1 = np.sum(np.sum(np.square(Theta1_l[:,1:])))
    rtheta2 = np.sum(np.sum(np.square(Theta2_l[:,1:])))
    bias = lambda_l/(2*m)
    
    J= J + (bias * (rtheta1+rtheta2))
    Del1, Del2 = 0, 0
    
    for t in range(m): 
        A1 = X_l[t,:].T.reshape(-1, 1) # all columns with one row at a time
        Z2 = Theta1_l.dot(A1)
        A2 = np.concatenate((np.c_[np.array([1])], sigmoid(Z2).reshape(-1,1)))
        Z3 = Theta2_l.dot(A2)
        H = sigmoid(Z3)
        actual = y_l[t,:].reshape(-1,1)
        yk = np.zeros((num_labels_l,1))
        yk[actual - 1] = 1
        del3 = H - yk;
        del2 = (Theta2_l[:,1:].T.dot(del3)) * sigmoidGradient(Z2).reshape(-1, 1)
        #print(A2.shape)
        Del1 = Del1 + del2.dot(A1.T)
        Del2 = Del2 + del3.dot(A2.T)
    
    #print(np.sum(np.sum(Del2)))
    
    Theta1_grad = (Del1/m) + (lambda_l/m) * np.c_[np.zeros((hidden_layer_size_l,1)), Theta1_l[:,1:]]
    Theta2_grad = (Del2/m) + (lambda_l/m) * np.c_[np.zeros((num_labels_l,1)), Theta2_l[:,1:]]
    return J, arrangeParams(Theta1_grad, Theta2_grad)# np.concatenate((Theta1_grad.reshape(Theta1_grad.size, 1, order='F'), Theta2_grad.reshape(Theta2_grad.size, 1, order='F')), axis=0)
    
#calling    
lambda_val = 0

J, grad = nnCostFunction(nn_params, input_layer_size, hidden_layer_size, num_labels, X, y, lambda_val)
print(['Cost at parameters (loaded from ex4weights): \n(this value should be about 0.287629)\n'], J);


['Cost at parameters (loaded from ex4weights): \n(this value should be about 0.287629)\n'] 0.2876291651613189


In [461]:
print('\nChecking Cost Function (w/ Regularization) ... \n')
lambda_val = 1

J, grad = nnCostFunction(nn_params, input_layer_size, hidden_layer_size, num_labels, X, y, lambda_val)
print(['Cost at parameters (loaded from ex4weights):\n(this value should be about 0.383770)\n'], J)




Checking Cost Function (w/ Regularization) ... 

['Cost at parameters (loaded from ex4weights):\n(this value should be about 0.383770)\n'] 0.38376985909092365


In [462]:
print('\nEvaluating sigmoid gradient...\n')

g = sigmoidGradient(np.array([-1, -0.5, 0, 0.5, 1]))
print('Sigmoid gradient evaluated at [-1 -0.5 0 0.5 1]:\n  ')
print(g)



Evaluating sigmoid gradient...

Sigmoid gradient evaluated at [-1 -0.5 0 0.5 1]:
  
[0.19661193 0.23500371 0.25       0.23500371 0.19661193]


In [463]:
def randInitializeWeights(L_in, L_out):
    epsilon_init = 0.12
    return np.random.rand(L_out, 1 + L_in) * 2 * epsilon_init - epsilon_init

In [464]:


# ================ Part 6: Initializing Pameters ================
#  In this part of the exercise, you will be starting to implment a two
#  layer neural network that classifies digits. You will start by
#  implementing a function to initialize the weights of the neural network
#  (randInitializeWeights.m)

print('\nInitializing Neural Network Parameters ...\n')
initial_Theta1 = randInitializeWeights(input_layer_size, hidden_layer_size)
initial_Theta2 = randInitializeWeights(hidden_layer_size, num_labels)
initial_nn_params = arrangeParams(initial_Theta1, initial_Theta2)



Initializing Neural Network Parameters ...



In [465]:
def debugInitializeWeights(fan_out, fan_in):
    W = np.zeros((fan_out, 1 + fan_in))
    W = np.sin(np.arange(1, W.size + 1)).reshape(W.shape) / 10
    return W



In [466]:
def computeNumericalGradient(J, aa, theta):

    numgrad = np.zeros(theta.shape)
    perturb = np.zeros(theta.shape)
    e = 0.0001#1e-4
    for p in range(theta.size):
        #Set perturbation vector
        perturb[p] = e;
        loss1 = J(theta - perturb)
        loss2 = J(theta + perturb)
        #Compute Numerical Gradient
        numgrad[p] = (loss2 - loss1) / (2*e)
        perturb[p] = 0
    

In [467]:
#np.exp(0.00001)
1e-4

0.0001

In [468]:
def checkNNGradients(lamb=0):
    input_layer_size = 3
    hidden_layer_size = 5
    num_labels = 3
    m = 5
    Theta1 = debugInitializeWeights(hidden_layer_size, input_layer_size)
    Theta2 = debugInitializeWeights(num_labels, hidden_layer_size)
    X  = debugInitializeWeights(m, input_layer_size - 1)
    y = np.mod([i for i in range(1,m+1) ], num_labels).reshape(-1,1)
    print(Theta1.shape)
    nn_params = np.concatenate((Theta1.T.reshape(Theta1.size,1), Theta2.reshape(Theta2.size,1)))

    
    cost, grad = nnCostFunction(nn_params,input_layer_size,hidden_layer_size, num_labels, X, y, lamb)
    
    def reduced_cost_func(p):
        #print("--------------------------------------")
        #print(p)
        #print("--------------------------------------")
        return nnCostFunction(p,input_layer_size,hidden_layer_size,num_labels,X,y,lamb)[0]
     
    numgrad = computeNumericalGradient(reduced_cost_func, 1, nn_params)

    #print(numgrad, grad)

    return


In [469]:
# print('\nChecking Backpropagation... \n');

# #Check gradients by running checkNNGradients
checkNNGradients()

(5, 4)


In [470]:
print('\nChecking Backpropagation (w/ Regularization) ... \n')
lambda_val = 3
checkNNGradients(lambda_val)
debug_J, debug_grad  = nnCostFunction(nn_params, input_layer_size, hidden_layer_size, num_labels, X, y, lambda_val);

print(['\n\nCost at (fixed) debugging parameters (w/ lambda = %f): \n(for lambda = 3, this value should be about 0.576051)\n\n'], lambda_val, debug_J)





Checking Backpropagation (w/ Regularization) ... 

(5, 4)
['\n\nCost at (fixed) debugging parameters (w/ lambda = %f): \n(for lambda = 3, this value should be about 0.576051)\n\n'] 3 (0.5760512469501331, array([[ 6.18712766e-05],
       [ 9.38798109e-05],
       [-1.92593606e-04],
       ...,
       [ 1.34904586e-05],
       [ 7.79237711e-05],
       [-2.31823790e-05]]))


In [471]:
a

array([[0.14887099, 0.29848163, 0.09591432, 0.68609031],
       [0.84853923, 0.04476457, 0.3494197 , 0.14859526],
       [0.3907762 , 0.51925247, 0.40387419, 0.46743337],
       [0.82952177, 0.15120962, 0.80575269, 0.05956335]])

In [472]:
#print(a[:,2:3])
a[3,:]

array([0.82952177, 0.15120962, 0.80575269, 0.05956335])

In [473]:
(hidden_layer_size * (input_layer_size + 1))

10025