# Programming Exercise 4: Neural Networks Learning

In [62]:
# Importing the needed libraries
# import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from neuralNetwork import *

# Optimization module in scipy
#from scipy import optimize

# We'll use loadmap to load the matlab dataset
from scipy.io import loadmat

# tells matplotlib to embed plots within the notebook
%matplotlib inline

In [79]:
input_layer_size  = 400  # 20x20 Input Images of Digits
hidden_layer_size = 25   # 25 hidden units
num_labels = 10          # 10 labels, from 0 to 9 - output layer

# trying to represent the NN using an array.
# nnDef.shape[0] = number of layers
# nnDef[i] = number of neurons on layer i
nnDef = np.array([input_layer_size, hidden_layer_size, num_labels])

In [80]:
# Loading all data on a dictonary
data = loadmat('ex4data1.mat')

# Convert the data into a numpy array
X = data['X']
y = data['y'].flatten()

# m = number of training examples
# n = number of features
(m,n) = X.shape

# note that X has mapped "0" to label 10 because Matlab arrays start on 1
# We'll normalize the 10 value back to 0, so it matches the 0 digit
y[y == 10] = 0

In [81]:
# We create a Theta array of arrays
Theta = np.zeros(nnDef.size, dtype=np.ndarray)

# Load the weights data to initialize Theta
thetaMat = loadmat('ex4weights.mat')
Theta[1] = thetaMat['Theta1']
Theta[2] = thetaMat['Theta2']

# swap first and last columns of Theta2, due to legacy from MATLAB indexing, 
# since the weight file ex3weights.mat was saved based on MATLAB indexing
# Explanation: 0 in MATLAB is represented by 10. 
# Therefore the theta for 10 in Matlab corresponds with 0 in Python
Theta[2] = np.roll(Theta[2], 1, axis=0)

In [106]:
# NNCOSTFUNCTION Implements the neural network cost function for the
# neural network which performs classification
#   J, grad = nnCostFunction(Theta, nnDef, X, y, lmbd) 
# computes the cost and gradient of the neural network

def nnCostFunction(Theta, nnDef, X, y, lmbd):
    # Useful variables
    (m, n) = X.shape                   # m = number of training examples, n = number of features
    num_labels = nnDef[-1]             # Output Layer units
    num_layers = nnDef.size            # Number of layers including the Input Layer
    
    # a array containing the activation arrays
    # (using numbering from 1 to be coerent with notation)
    a = np.zeros((nnDef.size+1,), dtype=np.ndarray)

    a[1] = X                # The activation for the Input layer is X
    
    # Extending the y vector into an array where 1 representents the label
    y10 = np.zeros((m,num_labels))
    y= y[:, np.newaxis]
    for i in range(num_labels):
        y10[:,i][:,np.newaxis] = np.where(y==i,1,0) 
    
    # Forward Propagation
    for i in range(1, num_layers):
        # Add the bias unit to the a layer
        mLayer = a[i].shape[0]
        a[i] = np.append(np.ones((mLayer, 1)), a[i], axis=1)
        a[i+1] = sigmoid(np.dot(a[i], Theta[i].T))
        
    # Cost Function
    J = (-1/m)*np.sum((np.multiply(np.log(a[num_layers]), y10) + np.multiply((1-y10), np.log(1-a[num_layers]))))
    # Cost adding regularization
    for i in range(1, num_layers):
        J = J + (lmbd/(2*m))*(np.sum((np.power(Theta[i][:, 1:], 2))))
        
    # Getting the gradient
    Theta_grad = np.zeros((Theta.shape), dtype=np.ndarray)
    delta = np.zeros((nnDef.size+1,), dtype=np.ndarray)
    
    delta[num_layers] = (a[num_layers] - y10)
    for i in reversed(range(2, num_layers)):
        delta[i] = (np.dot(Theta[i], delta[i+1]))*(a[i]*(1-a[i]))
        delta[i] = delta[i][:, 1:]
    
    # Regularization part of the gradient
    for i in reversed(range(1, num_layers)):
        grad[i] = ((1/m)*np.dot(delta[i+1].T, a[i])) + ((lmbd/m)*np.hstack((np.zeros((Theta[i].shape[0],1)),Theta[i][:,1:])))
    
    return J, grad
    

In [120]:
for i in reversed(range(1, num_layers)):
    print(i)

2
1


In [118]:
Theta[1][:,:]

array([[-2.25623899e-02, -1.05624163e-08,  2.19414684e-09, ...,
        -1.30529929e-05, -5.04175101e-06,  2.80464449e-09],
       [-9.83811294e-02,  7.66168682e-09, -9.75873689e-09, ...,
        -5.60134007e-05,  2.00940969e-07,  3.54422854e-09],
       [ 1.16156052e-01, -8.77654466e-09,  8.16037764e-09, ...,
        -1.20951657e-04, -2.33669661e-06, -7.50668099e-09],
       ...,
       [-1.83220638e-01, -8.89272060e-09, -9.81968100e-09, ...,
         2.35311186e-05, -3.25484493e-06,  9.02499060e-09],
       [-7.02096331e-01,  3.05178374e-10,  2.56061008e-09, ...,
        -8.61759744e-04,  9.43449909e-05,  3.83761998e-09],
       [-3.50933229e-01,  8.85876862e-09, -6.57515140e-10, ...,
        -1.80365926e-06, -8.14464807e-06,  8.79454531e-09]])

## Compute Cost (Feedforward)

In [108]:
J = nnCostFunction(Theta, nnDef, X, y, 0);

print('Cost at parameters (loaded from ex4weights): {:.6f} \n(this value should be about 0.287629)\n'.format(J));

Cost at parameters (loaded from ex4weights): 0.287629 
(this value should be about 0.287629)



## Implement Regularization

In [84]:
lmbd = 1

J = nnCostFunction(Theta, nnDef, X, y, lmbd);

print('Cost at parameters (loaded from ex4weights): {:.6f} \n(this value should be about 0.383770)\n'.format(J));

Cost at parameters (loaded from ex4weights): 0.383770 
(this value should be about 0.383770)

