In [17]:
import numpy as np
from scipy.optimize import minimize
from scipy.io import loadmat
from math import sqrt

In [None]:
import os
os.chdir("/Users/vishnoo/Downloads/Assignment2/basecode")

In [18]:
def initializeWeights(n_in, n_out):
    """
    # initializeWeights return the random weights for Neural Network given the
    # number of node in the input layer and output layer

    # Input:
    # n_in: number of nodes of the input layer
    # n_out: number of nodes of the output layer
       
    # Output: 
    # W: matrix of random initial weights with size (n_out x (n_in + 1))"""

    epsilon = sqrt(6) / sqrt(n_in + n_out + 1)
    W = (np.random.rand(n_out, n_in + 1) * 2 * epsilon) - epsilon
    return W

In [19]:
def sigmoid(z):
    """# Notice that z can be a scalar, a vector or a matrix
    # return the sigmoid of input z"""

    return  1/(1 + np.exp(-z))

In [25]:
def preprocess():
    """ Input:
     Although this function doesn't have any input, you are required to load
     the MNIST data set from file 'mnist_all.mat'.

     Output:
     train_data: matrix of training set. Each row of train_data contains 
       feature vector of a image
     train_label: vector of label corresponding to each image in the training
       set
     validation_data: matrix of training set. Each row of validation_data 
       contains feature vector of a image
     validation_label: vector of label corresponding to each image in the 
       training set
     test_data: matrix of training set. Each row of test_data contains 
       feature vector of a image
     test_label: vector of label corresponding to each image in the testing
       set

     Some suggestions for preprocessing step:
     - feature selection"""

    mat = loadmat('mnist_all.mat')  # loads the MAT object as a Dictionary

    # Split the training sets into two sets of 50000 randomly sampled training examples and 10000 validation examples. 
    # Your code here.
     # initializing arrays with zeros#
    train_pp = np.zeros(shape=(50000, 784))
    validation_pp = np.zeros(shape=(10000, 784))
    test_pp = np.zeros(shape=(10000, 784))
    train_label_pp = np.zeros(shape=(50000,))
    validation_label_pp = np.zeros(shape=(10000,))
    test_label_pp = np.zeros(shape=(10000,))
    #initializing variables#
    train_length = 0
    validation_length = 0
    test_length = 0
    train_label_length = 0
    validation_label_length = 0
    #splitting data into six arrays#
    for key in mat:
        if "train" in key:
            label = key[-1]
            tup = mat.get(key)
            sap = range(tup.shape[0])
            tup_permutation = np.random.permutation(sap)
            tup_length = len(tup)  
            tag_length = tup_length - 1000
            #data to training set#
            train_pp[train_length:train_length + tag_length] = tup[tup_permutation[1000:], :]
            train_length += tag_length
            train_label_pp[train_label_length:train_label_length + tag_length] = label
            train_label_length += tag_length
            #data to validation set#
            validation_pp[validation_length:validation_length + 1000] = tup[tup_permutation[0:1000], :]
            validation_length += 1000

            validation_label_pp[validation_label_length:validation_label_length + 1000] = label
            validation_label_length += 1000

        elif "test" in key:
            label = key[-1]
            tup = mat.get(key)
            sap = range(tup.shape[0])
            tup_permutation = np.random.permutation(sap)
            tup_length = len(tup)
            test_label_pp[test_length:test_length + tup_length] = label
            test_pp[test_length:test_length + tup_length] = tup[tup_permutation]
            test_length += tup_length
           
    train_size = range(train_pp.shape[0])
    train_permutation = np.random.permutation(train_size)
    train_data = train_pp[train_permutation]
    train_data = np.double(train_data)
    train_data = train_data / 255.0
    train_label = train_label_pp[train_permutation]

    validation_size = range(validation_pp.shape[0])
    validation_permutation = np.random.permutation(validation_size)
    validation_data = validation_pp[validation_permutation]
    validation_data = np.double(validation_data)
    validation_data = validation_data / 255.0
    validation_label = validation_label_pp[validation_permutation]

    test_size = range(test_pp.shape[0])
    test_permutation = np.random.permutation(test_size)
    test_data = test_pp[test_permutation]
    test_data = np.double(test_data)
    test_data = test_data / 255.0
    test_label = test_label_pp[test_permutation]


# Feature selection
    boolean_matrix = np.equal(train_data[0, :], train_data)
    boolean_m2 = np.all(boolean_matrix, axis = 0)
    length_result = boolean_m2.shape[0]
    columnsToDelete = np.where(boolean_m2 == True)
    global selected_features
    selected_features = np.where(boolean_m2 == False)
    train_data_2 = np.delete(train_data,columnsToDelete[0],1)
    validation_data_2 = np.delete(validation_data,columnsToDelete[0],1)
    test_data_2 = np.delete(test_data, columnsToDelete[0], 1)
    train_data = train_data_2
    validation_data = validation_data_2
    test_data = test_data_2


    print('preprocess done')

    return train_data, train_label, validation_data, validation_label, test_data, test_label

In [29]:
def nnObjFunction(params, *args):
    """% nnObjFunction computes the value of objective function (negative log 
    %   likelihood error function with regularization) given the parameters 
    %   of Neural Networks, thetraining data, their corresponding training 
    %   labels and lambda - regularization hyper-parameter.

    % Input:
    % params: vector of weights of 2 matrices w1 (weights of connections from
    %     input layer to hidden layer) and w2 (weights of connections from
    %     hidden layer to output layer) where all of the weights are contained
    %     in a single vector.
    % n_input: number of node in input layer (not include the bias node)
    % n_hidden: number of node in hidden layer (not include the bias node)
    % n_class: number of node in output layer (number of classes in
    %     classification problem
    % training_data: matrix of training data. Each row of this matrix
    %     represents the feature vector of a particular image
    % training_label: the vector of truth label of training images. Each entry
    %     in the vector represents the truth label of its corresponding image.
    % lambda: regularization hyper-parameter. This value is used for fixing the
    %     overfitting problem.
       
    % Output: 
    % obj_val: a scalar value representing value of error function
    % obj_grad: a SINGLE vector of gradient value of error function
    % NOTE: how to compute obj_grad
    % Use backpropagation algorithm to compute the gradient of error function
    % for each weights in weight matrices.

    %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    % reshape 'params' vector into 2 matrices of weight w1 and w2
    % w1: matrix of weights of connections from input layer to hidden layers.
    %     w1(i, j) represents the weight of connection from unit j in input 
    %     layer to unit i in hidden layer.
    % w2: matrix of weights of connections from hidden layer to output layers.
    %     w2(i, j) represents the weight of connection from unit j in hidden 
    %     layer to unit i in output layer."""

    n_input, n_hidden, n_class, training_data, training_label, lambdaval = args

    w1 = params[0:n_hidden * (n_input + 1)].reshape((n_hidden, (n_input + 1)))
    w2 = params[(n_hidden * (n_input + 1)):].reshape((n_class, (n_hidden + 1)))
    obj_val = 0

    # Your code here
    training_data = np.append(training_data, np.ones((training_data.shape[0],1)), 1)

    zj = np.dot(training_data, np.transpose(w1))
    zj = sigmoid(zj)
    zj = np.append(zj, np.ones((zj.shape[0], 1)),1)
    ol = np.dot(zj, np.transpose(w2))
    ol = sigmoid(ol)
    ol_log = np.log(ol)

    label_mod = np.zeros((training_data.shape[0],n_class))

    for i in range(training_label.shape[0]):
        index = int(training_label[i])
        label_mod[i][index] = 1


    result_a = np.add(np.multiply(label_mod, ol_log), np.multiply(np.subtract(1,label_mod), np.log(np.subtract(1, ol))))
    result_a = np.divide(np.sum(result_a),(-1)*training_data.shape[0])
    result_b = (np.sum(np.square(w1)) + np.sum(np.square(w2))) * np.divide(lambdaval,(2*training_data.shape[0]))
    obj_val = result_a + result_b

    #reckon w2
    grad_w2 = np.divide(np.add(np.dot(np.transpose(np.subtract(ol,label_mod)), zj), np.multiply(lambdaval,w2)),training_data.shape[0])
    new_w2 = w2[:,0:w2.shape[1]-1]
    zj = zj[:,0:zj.shape[1]-1]
    t1 = np.multiply(np.multiply(np.subtract(1,zj),zj),np.dot(np.subtract(ol,label_mod),new_w2))
    grad_w1 = np.add(np.dot(t1.T,training_data),np.multiply(lambdaval,w1))/training_data.shape[0]

    # Make sure you reshape the gradient matrices to a 1D array. for instance if your gradient matrices are grad_w1 and grad_w2
    # you would use code similar to the one below to create a flat array
    obj_grad = np.concatenate((grad_w1.flatten(), grad_w2.flatten()),0)
    #obj_grad = np.array([])

    return (obj_val, obj_grad)


In [30]:
def nnPredict(w1, w2, data):
    """% nnPredict predicts the label of data given the parameter w1, w2 of Neural
    % Network.

    % Input:
    % w1: matrix of weights of connections from input layer to hidden layers.
    %     w1(i, j) represents the weight of connection from unit i in input 
    %     layer to unit j in hidden layer.
    % w2: matrix of weights of connections from hidden layer to output layers.
    %     w2(i, j) represents the weight of connection from unit i in input 
    %     layer to unit j in hidden layer.
    % data: matrix of data. Each row of this matrix represents the feature 
    %       vector of a particular image
       
    % Output: 
    % label: a column vector of predicted labels"""

    labels = np.array([])
    # Your code here
    bias = np.append(data, np.ones((data.shape[0],1)), 1)
    zj = np.dot(bias, np.transpose(w1))
    zj = sigmoid(zj)
    zj = np.append(zj, np.ones((zj.shape[0],1)), 1)
    ol = np.dot(zj, np.transpose(w2))
    ol = sigmoid(ol)
    labels = np.argmax(ol,axis=1)

    return labels

In [26]:
train_data, train_label, validation_data, validation_label, test_data, test_label = preprocess()

preprocess done


In [40]:
#  Train Neural Network

# set the number of nodes in input unit (not including bias unit)
n_input = train_data.shape[1]
for j in range(30,51,10):  # 20 - 100 hidden layers with a step of 10
    for k in range(0,51,5): # 0 - 50    lambda with a step of 5
        # set the number of nodes in hidden unit (not including bias unit)
        n_hidden = j

        # set the number of nodes in output unit
        n_class = 100

        # initialize the weights into some random matrices
        initial_w1 = initializeWeights(n_input, n_hidden)
        initial_w2 = initializeWeights(n_hidden, n_class)

        # unroll 2 weight matrices into single column vector
        initialWeights = np.concatenate((initial_w1.flatten(), initial_w2.flatten()), 0)

        # set the regularization hyper-parameter
        lambdaval = k

        args = (n_input, n_hidden, n_class, train_data, train_label, lambdaval)

        # Train Neural Network using fmin_cg or minimize from scipy,optimize module. Check documentation for a working example

        opts = {'maxiter': 100}  # Preferred value.

        nn_params = minimize(nnObjFunction, initialWeights, jac=True, args=args, method='CG', options=opts)

        # In Case you want to use fmin_cg, you may have to split the nnObjectFunction to two functions nnObjFunctionVal
        # and nnObjGradient. Check documentation for this function before you proceed.
        # nn_params, cost = fmin_cg(nnObjFunctionVal, initialWeights, nnObjGradient,args = args, maxiter = 50)


        # Reshape nnParams from 1D vector into w1 and w2 matrices
        w1 = nn_params.x[0:n_hidden * (n_input + 1)].reshape((n_hidden, (n_input + 1)))
        w2 = nn_params.x[(n_hidden * (n_input + 1)):].reshape((n_class, (n_hidden + 1)))
        
        print(str(n_hidden),end='\t')
        print(str(lambdaval),end='\t')
        
        # Test the computed parameters

        predicted_label = nnPredict(w1, w2, train_data)

        # find the accuracy on Training Dataset

        print('\n Training set Accuracy:' + str(100 * np.mean((predicted_label == train_label).astype(float))) + '%')

        predicted_label = nnPredict(w1, w2, validation_data)

        # find the accur acy on Validation Dataset

        print('\n Validation set Accuracy:' + str(100 * np.mean((predicted_label == validation_label).astype(float))) + '%')

        predicted_label = nnPredict(w1, w2, test_data)

        # find the accuracy on Validation Dataset

        print('\n Test set Accuracy:' + str(100 * np.mean((predicted_label == test_label).astype(float))) + '%')

30	0	
 Training set Accuracy:96.974%

 Validation set Accuracy:95.87%

 Test set Accuracy:95.82000000000001%
30	5	
 Training set Accuracy:96.78999999999999%

 Validation set Accuracy:95.76%

 Test set Accuracy:95.72%
30	10	
 Training set Accuracy:96.406%

 Validation set Accuracy:95.54%

 Test set Accuracy:95.55%
30	15	
 Training set Accuracy:96.286%

 Validation set Accuracy:95.52000000000001%

 Test set Accuracy:95.45%
30	20	
 Training set Accuracy:96.02000000000001%

 Validation set Accuracy:95.49%

 Test set Accuracy:95.35%
30	25	
 Training set Accuracy:95.49600000000001%

 Validation set Accuracy:95.09%

 Test set Accuracy:95.03%
30	30	
 Training set Accuracy:95.28%

 Validation set Accuracy:94.74000000000001%

 Test set Accuracy:95.12%
30	35	
 Training set Accuracy:94.948%

 Validation set Accuracy:94.52000000000001%

 Test set Accuracy:94.66%
30	40	
 Training set Accuracy:94.994%

 Validation set Accuracy:94.69999999999999%

 Test set Accuracy:94.92%
30	45	
 Training set Accurac