In [5]:
%matplotlib inline
import scipy.io as io
import random
import scipy.optimize as op
from mpl_toolkits import mplot3d
import scipy.ndimage
import numpy as np 
import pandas as pd
import matplotlib.pyplot as plt
plt.rcParams['figure.figsize']=(20.0,10.0)

def sigmoid(z):
    g = np.zeros((z.shape[0],1))
    g=1/(1+np.exp(-z))
    return g

def nnCostFunction(nn_params, X, y, lambda1, input_layer_size, hidden_layer_size, num_labels):
    Theta1 = nn_params[:((input_layer_size+1) * hidden_layer_size)].reshape(hidden_layer_size,input_layer_size+1)
    Theta2 = nn_params[((input_layer_size +1)* hidden_layer_size ):].reshape(num_labels,hidden_layer_size+1)
    J = 0
    m=X.shape[0]
    X = np.concatenate((np.ones((m,1)),X),axis=1)
    z1=np.dot(X,Theta1.T)
    a1=sigmoid(z1)
    a1= np.concatenate((np.ones((m,1)),a1),axis=1)
    z2=np.dot(a1,Theta2.T)
    h=sigmoid(z2)
    for k in range(0,num_labels):
        y1=(y==k)*1
        J=J-(np.dot(y1.T,np.log(h[:,k]))+(np.dot((1-y1).T,np.log(1-h[:,k]))))/m

    theta1reg =np.sum(np.sum(Theta1**2))-np.sum(Theta1[:,0]**2)
    theta2reg =np.sum(np.sum(Theta2**2))-np.sum(Theta2[:,0]**2)
    J=J+(theta1reg+theta2reg)*lambda1/(2*m)
    return J
    
def sigmoidGradient(z):
    g = np.zeros(z.shape)
    g=1/(1+np.exp(-z))
    g=g*(1-g)
    return g

def randInitializeWeights(L_in, L_out):
    W = np.zeros((L_out, 1 + L_in))
    epsilon_init = 0.12
    W = (np.array(np.random.random((L_out, 1 + L_in))) * 2 * epsilon_init)-epsilon_init
    return W

def grad(nn_params, X, y, lambda1, input_layer_size, hidden_layer_size, num_labels):
    Theta1 = nn_params[:((input_layer_size+1) * hidden_layer_size)].reshape(hidden_layer_size,input_layer_size+1)
    Theta2 = nn_params[((input_layer_size +1)* hidden_layer_size ):].reshape(num_labels,hidden_layer_size+1)
    m=X.shape[0]
    X = np.concatenate((np.ones((m,1)),X),axis=1)
    Theta1_grad = np.zeros(Theta1.shape)
    Theta2_grad = np.zeros(Theta2.shape)
    D1=0
    D2=0
    for i in range(0,m):
        a1=X[i,:].reshape(-1,1).T
        z2=np.dot(a1,Theta1.T)
        a2=sigmoid(z2)
        a2= np.concatenate((np.ones((1,1)),a2),axis=1)
        z3=np.dot(a2,Theta2.T)
        a3=sigmoid(z3)
        label=y[i,:]
        y2=np.zeros((1,num_labels))
        y2[:,label]=1
        del3=a3-y2
        del2=np.dot((Theta2[:,1:]).T,del3.T)*sigmoidGradient(z2).T
        D1=D1+np.dot(del2,a1)
        D2=D2+np.dot(del3.T,a2)

    Theta1_grad = D1/m + np.concatenate((np.zeros((hidden_layer_size,1)),Theta1[:,1:]),axis=1)*lambda1/m
    Theta2_grad = D2/m + np.concatenate((np.zeros((num_labels,1)),Theta2[:,1:]),axis=1)*lambda1/m

    grad = np.append(Theta1_grad.flatten(),Theta2_grad.flatten())
    return grad

input_layer_size  = 400
hidden_layer_size = 25
num_labels = 10

mat = io.loadmat('ex4data1.mat')
X=mat['X']
y=mat['y']
[m, n] = X.shape

Theta = io.loadmat('ex4weights.mat')
Theta1=Theta['Theta1']
Theta2=Theta['Theta2']

nn_params = np.append(Theta1.flatten(),Theta2.flatten())

lambda1=0

y=y-1

J = nnCostFunction(nn_params, X, y, lambda1, input_layer_size, hidden_layer_size, num_labels)
print('\nCost at parameters (loaded from ex4weights):(this value should be about 0.287629)=\n', J)

lambda1 = 3
J = nnCostFunction(nn_params, X, y, lambda1, input_layer_size, hidden_layer_size, num_labels)
print('\nCost at parameters (loaded from ex4weights):(this value should be about 0.576051)=\n', J)

lambda1 = 1
J = nnCostFunction(nn_params, X, y, lambda1, input_layer_size, hidden_layer_size, num_labels)
print('\nCost at parameters (loaded from ex4weights):(this value should be about 0.383770)=\n', J)

g = sigmoidGradient(np.array([-1,-0.5,0,0.5,1]))
print('\nSigmoid gradient evaluated at [-1 -0.5 0 0.5 1]:')
print(g)
print('\n\n')
       
initial_Theta1 = randInitializeWeights(input_layer_size, hidden_layer_size)
initial_Theta2 = randInitializeWeights(hidden_layer_size, num_labels)
initial_nn_params = np.append(initial_Theta1.flatten(),initial_Theta2.flatten()).reshape(-1,1)

[xopt, fopt, func_calls, grad_calls, warnflg]=op.fmin_cg(f=nnCostFunction, x0=initial_nn_params, fprime=grad, args=(X,y,lambda1,input_layer_size,hidden_layer_size,num_labels), maxiter=150, disp=True, full_output=True)
xopt=xopt.reshape(-1,1)
Theta1 = xopt[:((input_layer_size+1) * hidden_layer_size)].reshape(hidden_layer_size,input_layer_size+1)
Theta2 = xopt[((input_layer_size +1)* hidden_layer_size ):].reshape(num_labels,hidden_layer_size+1)

X = np.concatenate((np.ones((m,1)),X),axis=1)
p = np.zeros((m, 1))
h1 = sigmoid(np.dot(X,Theta1.T))
h1= np.concatenate((np.ones((m,1)),h1),axis=1)
h2 = sigmoid(np.dot(h1,Theta2.T))
p=np.argmax(h2, axis=1).reshape(-1,1)
p=(p==y)*1
print('\nTraining Set Accuracy:', np.mean(p) * 100)


Cost at parameters (loaded from ex4weights):(this value should be about 0.287629)=
 [0.28762917]

Cost at parameters (loaded from ex4weights):(this value should be about 0.576051)=
 [0.57605125]

Cost at parameters (loaded from ex4weights):(this value should be about 0.383770)=
 [0.38376986]

Sigmoid gradient evaluated at [-1 -0.5 0 0.5 1]:
[0.19661193 0.23500371 0.25       0.23500371 0.19661193]



         Current function value: 0.347010
         Iterations: 150
         Function evaluations: 323
         Gradient evaluations: 323

Training Set Accuracy: 98.94
