In [1]:
from scipy.io import loadmat
import numpy as np
import scipy.optimize as opt
import pandas as pd
import matplotlib.pyplot as plt

# reading the data
data = loadmat('ex4data1.mat')
X = data['X']
y = data['y']

# visualizing the data
_, axarr = plt.subplots(10,10,figsize=(10,10))
for i in range(10):
    for j in range(10):
       axarr[i,j].imshow(X[np.random.randint(X.shape[0])].\
reshape((20,20), order = 'F'))          
       axarr[i,j].axis('off')

In [2]:
weights = loadmat('ex4weights.mat')
theta1 = weights['Theta1']    #Theta1 has size 25 x 401
theta2 = weights['Theta2']    #Theta2 has size 10 x 26

nn_params = np.hstack((theta1.ravel(order='F'), theta2.ravel(order='F')))    #unroll parameters

# neural network hyperparameters
input_layer_size = 400
hidden_layer_size = 25
num_labels = 10
lmbda = 0

In [3]:
def sigmoid(z):
    return 1/(1+np.exp(-z))
def sigmoidGrad(z):
    return np.multiply(sigmoid(z), 1-sigmoid(z))
def randInitializeWeights(L_in, L_out):
    epsilon = 0.12
    return np.random.rand(L_out, L_in+1) * 2 * epsilon - epsilon

def nnCostFunc(nn_params, input_layer_size, hidden_layer_size, num_labels, X, y, lmbda):
    theta1 = np.reshape(nn_params[:hidden_layer_size*(input_layer_size+1)], (hidden_layer_size, input_layer_size+1), 'F')
    theta2 = np.reshape(nn_params[hidden_layer_size*(input_layer_size+1):], (num_labels, hidden_layer_size+1), 'F')

    m = len(y)
    ones = np.ones((m,1))
    a1 = np.hstack((ones, X))
    a2 = sigmoid(a1 @ theta1.T)
    a2 = np.hstack((ones, a2))
    h = sigmoid(a2 @ theta2.T)
    
    y_d = pd.get_dummies(y.flatten())
    
    temp1 = np.multiply(y_d, np.log(h))
    temp2 = np.multiply(1-y_d, np.log(1-h))
    temp3 = np.sum(temp1 + temp2)
    print(np.power(theta2[:,1:],2))
    
    sum1 = np.sum(np.sum(np.power(theta1[:,1:],2), axis = 1))
    sum2 = np.sum(np.sum(np.power(theta2[:,1:],2), axis = 1))
    
    return np.sum(temp3 / (-m)) + (sum1 + sum2) * lmbda / (2*m)
def nnGrad(nn_params, input_layer_size, hidden_layer_size, num_labels, X, y, lmbda):
    
    initial_theta1 = np.reshape(nn_params[:hidden_layer_size*(input_layer_size+1)], (hidden_layer_size, input_layer_size+1), 'F')
    initial_theta2 = np.reshape(nn_params[hidden_layer_size*(input_layer_size+1):], (num_labels, hidden_layer_size+1), 'F')
    y_d = pd.get_dummies(y.flatten())
    delta1 = np.zeros(initial_theta1.shape)
    delta2 = np.zeros(initial_theta2.shape)
    m = len(y)

    for i in range(X.shape[0]):
        ones = np.ones(1)
        a1 = np.hstack((ones, X[i]))
        z2 = a1 @ initial_theta1.T
        a2 = np.hstack((ones, sigmoid(z2)))
        z3 = a2 @ initial_theta2.T
        a3 = sigmoid(z3)
        
        d3 = a3 - y_d.iloc[i,:][np.newaxis,:]


        z2 = np.hstack((ones, z2))
        d2 = np.multiply(initial_theta2.T @ d3.T, sigmoidGrad(z2).T[:,np.newaxis])

        delta1 = delta1 + d2[1:,:] @ a1[np.newaxis,:]
        delta2 = delta2 + d3.T @ a2[np.newaxis,:]
    delta1 /= m
    delta2 /= m
    print(delta1.shape, delta2.shape)
    delta1[:,1:] = delta1[:,1:] + initial_theta1[:,1:] * lmbda / m
    delta2[:,1:] = delta2[:,1:] + initial_theta2[:,1:] * lmbda / m
        
    return np.hstack((delta1.ravel(order='F'), delta2.ravel(order='F')))

In [4]:
initial_theta1 = randInitializeWeights(input_layer_size, hidden_layer_size)
initial_theta2 = randInitializeWeights(hidden_layer_size, num_labels)

# unrolling parameters into a single column vector
nn_initial_params = np.hstack((initial_theta1.ravel(order='F'), initial_theta2.ravel(order='F')))

cost = nnCostFunc(nn_initial_params, input_layer_size, hidden_layer_size, num_labels, X, y, 1)
nn_backprop_Params = nnGrad(nn_initial_params, input_layer_size, hidden_layer_size, num_labels, X, y, lmbda)

print(cost)
#print(nn_backprop_Params)
#print(np.shape(nn_backprop_Params))

[[4.36560746e-03 1.13970196e-02 1.30150071e-03 6.64639401e-03
  7.84122320e-03 1.29425870e-04 1.36567722e-02 2.37484521e-04
  8.01986448e-03 1.00999877e-02 7.96671262e-04 8.20331272e-03
  1.96771836e-03 2.00514519e-03 1.36899024e-02 1.43801858e-04
  7.29757350e-04 8.09067594e-05 1.05452960e-02 5.27346902e-03
  4.20519751e-04 1.46798147e-08 1.28585900e-03 6.28217892e-03
  5.27088219e-04]
 [5.81632071e-05 5.80381410e-03 6.25640797e-03 8.83095724e-03
  6.97553972e-03 1.59821739e-05 8.18105279e-03 1.04940958e-02
  5.85067862e-03 3.55180761e-03 6.27129393e-04 2.38973878e-03
  1.54395936e-03 1.16478481e-02 9.35122533e-03 7.22389985e-04
  5.65951696e-03 6.32831919e-03 1.31148936e-02 5.45541906e-04
  1.17169583e-03 1.31888013e-02 1.17357568e-02 3.76525695e-05
  5.67893051e-03]
 [7.71451695e-04 1.07498334e-02 6.17960881e-06 3.81961538e-04
  1.35930374e-02 9.40613239e-04 9.23168249e-04 1.31306441e-02
  7.91430849e-03 4.69318825e-03 2.44785953e-03 3.93499016e-03
  2.88739272e-03 8.16743675e-03 6.