In [15]:
import numpy as np
import pickle
from scipy.optimize import minimize
from scipy.io import loadmat
from math import sqrt

In [None]:
import os
os.chdir("/Users/vishnoo/Downloads/Assignment2/basecode")

In [16]:
def initializeWeights(n_in,n_out):
    """
    # initializeWeights return the random weights for Neural Network given the
    # number of node in the input layer and output layer

    # Input:
    # n_in: number of nodes of the input layer
    # n_out: number of nodes of the output layer
                            
    # Output: 
    # W: matrix of random initial weights with size (n_out x (n_in + 1))"""
    epsilon = sqrt(6) / sqrt(n_in + n_out + 1);
    W = (np.random.rand(n_out, n_in + 1)*2* epsilon) - epsilon;
    return W


In [17]:
def sigmoid(z):
    """# Notice that z can be a scalar, a vector or a matrix
    # return the sigmoid of input z"""

    return  1/(1 + np.exp(-z))

In [18]:
def nnObjFunction(params, *args):
    """% nnObjFunction computes the value of objective function (negative log 
    %   likelihood error function with regularization) given the parameters 
    %   of Neural Networks, thetraining data, their corresponding training 
    %   labels and lambda - regularization hyper-parameter.

    % Input:
    % params: vector of weights of 2 matrices w1 (weights of connections from
    %     input layer to hidden layer) and w2 (weights of connections from
    %     hidden layer to output layer) where all of the weights are contained
    %     in a single vector.
    % n_input: number of node in input layer (not include the bias node)
    % n_hidden: number of node in hidden layer (not include the bias node)
    % n_class: number of node in output layer (number of classes in
    %     classification problem
    % training_data: matrix of training data. Each row of this matrix
    %     represents the feature vector of a particular image
    % training_label: the vector of truth label of training images. Each entry
    %     in the vector represents the truth label of its corresponding image.
    % lambda: regularization hyper-parameter. This value is used for fixing the
    %     overfitting problem.
       
    % Output: 
    % obj_val: a scalar value representing value of error function
    % obj_grad: a SINGLE vector of gradient value of error function
    % NOTE: how to compute obj_grad
    % Use backpropagation algorithm to compute the gradient of error function
    % for each weights in weight matrices.

    %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    % reshape 'params' vector into 2 matrices of weight w1 and w2
    % w1: matrix of weights of connections from input layer to hidden layers.
    %     w1(i, j) represents the weight of connection from unit j in input 
    %     layer to unit i in hidden layer.
    % w2: matrix of weights of connections from hidden layer to output layers.
    %     w2(i, j) represents the weight of connection from unit j in hidden 
    %     layer to unit i in output layer."""

    n_input, n_hidden, n_class, training_data, training_label, lambdaval = args

    w1 = params[0:n_hidden * (n_input + 1)].reshape((n_hidden, (n_input + 1)))
    w2 = params[(n_hidden * (n_input + 1)):].reshape((n_class, (n_hidden + 1)))
    obj_val = 0

    # Your code here
    training_data = np.append(training_data, np.ones((training_data.shape[0],1)), 1)

    zj = np.dot(training_data, np.transpose(w1))
    zj = sigmoid(zj)
    zj = np.append(zj, np.ones((zj.shape[0], 1)),1)
    ol = np.dot(zj, np.transpose(w2))
    ol = sigmoid(ol)
    ol_log = np.log(ol)

    label_mod = np.zeros((training_data.shape[0],n_class))

    for i in range(training_label.shape[0]):
        index = int(training_label[i])
        label_mod[i][index] = 1


    result_a = np.add(np.multiply(label_mod, ol_log), np.multiply(np.subtract(1,label_mod), np.log(np.subtract(1, ol))))
    result_a = np.divide(np.sum(result_a),(-1)*training_data.shape[0])
    result_b = (np.sum(np.square(w1)) + np.sum(np.square(w2))) * np.divide(lambdaval,(2*training_data.shape[0]))
    obj_val = result_a + result_b

    #reckon w2
    grad_w2 = np.divide(np.add(np.dot(np.transpose(np.subtract(ol,label_mod)), zj), np.multiply(lambdaval,w2)),training_data.shape[0])
    new_w2 = w2[:,0:w2.shape[1]-1]
    zj = zj[:,0:zj.shape[1]-1]
    t = np.multiply(np.multiply(np.subtract(1,zj),zj),np.dot(np.subtract(ol,label_mod),new_w2))
    grad_w1 = np.add(np.dot(t.T,training_data),np.multiply(lambdaval,w1))/training_data.shape[0]

    # Make sure you reshape the gradient matrices to a 1D array. for instance if your gradient matrices are grad_w1 and grad_w2
    # you would use code similar to the one below to create a flat array
    obj_grad = np.concatenate((grad_w1.flatten(), grad_w2.flatten()),0)
    #obj_grad = np.array([])

    return (obj_val, obj_grad)

In [19]:
def nnPredict(w1, w2, data):
    """% nnPredict predicts the label of data given the parameter w1, w2 of Neural
    % Network.

    % Input:
    % w1: matrix of weights of connections from input layer to hidden layers.
    %     w1(i, j) represents the weight of connection from unit i in input 
    %     layer to unit j in hidden layer.
    % w2: matrix of weights of connections from hidden layer to output layers.
    %     w2(i, j) represents the weight of connection from unit i in input 
    %     layer to unit j in hidden layer.
    % data: matrix of data. Each row of this matrix represents the feature 
    %       vector of a particular image
       
    % Output: 
    % label: a column vector of predicted labels"""

    labels = np.array([])
    # Your code here
    bias = np.append(data, np.ones((data.shape[0],1)), 1)
    zj = np.dot(bias, np.transpose(w1))
    zj = sigmoid(zj)
    zj = np.append(zj, np.ones((zj.shape[0],1)), 1)
    ol = np.dot(zj, np.transpose(w2))
    ol = sigmoid(ol)
    labels = np.argmax(ol,axis=1)

    return labels

In [20]:
def preprocess():
    pickle_obj = pickle.load(file=open('face_all.pickle', 'rb'))
    features = pickle_obj['Features']
    labels = pickle_obj['Labels']
    train_x = features[0:21100] / 255
    valid_x = features[21100:23765] / 255
    test_x = features[23765:] / 255

    labels = labels[0]
    train_y = labels[0:21100]
    valid_y = labels[21100:23765]
    test_y = labels[23765:]
    return train_x, train_y, valid_x, valid_y, test_x, test_y

In [27]:
"""**************Neural Network Script Starts here********************************"""
train_data, train_label, validation_data, validation_label, test_data, test_label = preprocess()
#  Train Neural Network
# set the number of nodes in input unit (not including bias unit)
n_input = train_data.shape[1]
# set the number of nodes in hidden unit (not including bias unit)
for j in range(30,51,10):  # 20 - 100 hidden layers with a step of 10
    for k in range(0,31,5): # 0 - 30    lambda with a step of 5
        n_hidden = j
        # set the number of nodes in output unit
        n_class = 2

        # initialize the weights into some random matrices
        initial_w1 = initializeWeights(n_input, n_hidden);
        initial_w2 = initializeWeights(n_hidden, n_class);
        # unroll 2 weight matrices into single column vector
        initialWeights = np.concatenate((initial_w1.flatten(), initial_w2.flatten()),0)
        # set the regularization hyper-parameter
        lambdaval = k;
        args = (n_input, n_hidden, n_class, train_data, train_label, lambdaval)

        #Train Neural Network using fmin_cg or minimize from scipy,optimize module. Check documentation for a working example
        opts = {'maxiter' :100}    # Preferred value.

        nn_params = minimize(nnObjFunction, initialWeights, jac=True, args=args,method='CG', options=opts)
        params = nn_params.get('x')
        #Reshape nnParams from 1D vector into w1 and w2 matrices
        w1 = params[0:n_hidden * (n_input + 1)].reshape( (n_hidden, (n_input + 1)))
        w2 = params[(n_hidden * (n_input + 1)):].reshape((n_class, (n_hidden + 1)))
        print(str(n_hidden),end='\t')
        print(str(lambdaval),end='\t')
        #Test the computed parameters
        predicted_label = nnPredict(w1,w2,train_data)
        #find the accuracy on Training Dataset
        print('\n Training set Accuracy:' + str(100*np.mean((predicted_label == train_label).astype(float))) + '%')
        predicted_label = nnPredict(w1,w2,validation_data)
        #find the accuracy on Validation Dataset
        print('\n Validation set Accuracy:' + str(100*np.mean((predicted_label == validation_label).astype(float))) + '%')
        predicted_label = nnPredict(w1,w2,test_data)
        #find the accuracy on Validation Dataset
        print('\n Test set Accuracy:' +  str(100*np.mean((predicted_label == test_label).astype(float))) + '%')

30	0	
 Training set Accuracy:87.90521327014218%

 Validation set Accuracy:87.4671669793621%

 Test set Accuracy:87.69871309613929%
30	5	
 Training set Accuracy:88.54502369668246%

 Validation set Accuracy:87.6923076923077%

 Test set Accuracy:88.38001514004542%
30	10	
 Training set Accuracy:88.3175355450237%

 Validation set Accuracy:87.6547842401501%

 Test set Accuracy:88.11506434519303%
30	15	
 Training set Accuracy:88.82464454976304%

 Validation set Accuracy:87.7673545966229%

 Test set Accuracy:88.6449659348978%
30	20	
 Training set Accuracy:88.60189573459716%

 Validation set Accuracy:87.7298311444653%

 Test set Accuracy:88.19076457229372%
30	25	
 Training set Accuracy:88.13270142180095%

 Validation set Accuracy:87.3545966228893%

 Test set Accuracy:87.66086298258895%
30	30	
 Training set Accuracy:88.56872037914691%

 Validation set Accuracy:87.6547842401501%

 Test set Accuracy:88.41786525359576%
40	0	
 Training set Accuracy:88.41706161137441%

 Validation set Accuracy:88.255