# COMS 4995_002 Deep Learning Assignment 1
Due on Monday, Oct 9, 11:59pm

This assignment can be done in groups of at most 3 students. Everyone must submit on Courseworks individually.

Write down the UNIs of your group (if applicable)

Member 1: Animesh Anant Sharma, aas2325

Member 2: Himanshu Aggarwal, ha2467

Member 3: Name, UNI

In [1]:
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
import scipy.misc
import glob
import sys
# you shouldn't need to make any more imports

In [2]:
class NeuralNetwork(object):
    """
    Abstraction of neural network.
    Stores parameters, activations, cached values. 
    Provides necessary functions for training and prediction. 
    """
    def __init__(self, layer_dimensions, drop_prob=0.0, reg_lambda=0.0):
        """
        Initializes the weights and biases for each layer
        :param layer_dimensions: (list) number of nodes in each layer
        :param drop_prob: drop probability for dropout layers. Only required in part 2 of the assignment
        :param reg_lambda: regularization parameter. Only required in part 2 of the assignment
        """
        np.random.seed(1)
        
        self.parameters = {"weights":[],"biases":[]}
        i=0
        while i<(len(layer_dimensions)-1):
            self.parameters["weights"].append(0.01 * np.random.randn(layer_dimensions[i],layer_dimensions[i+1]))
            self.parameters["biases"].append(np.zeros((layer_dimensions[i+1],1)))
            i = i+1
        self.num_layers = len(layer_dimensions)-1
        self.drop_prob = drop_prob
        self.reg_lambda = reg_lambda
        self.training_mode = 0
        # init parameters
        

    def affineForward(self, A, W, b):
        """
        Forward pass for the affine layer.
        :param A: input matrix, shape (L, S), where L is the number of hidden units in the previous layer and S is
        the number of samples
        :returns: the affine product WA + b, along with the cache required for the backward pass
        """
        return np.dot(W.T,A)+b

    def activationForward(self, A, activation="relu"):
        """
        Common interface to access all activation functions.
        :param A: input to the activation function
        :param prob: activation funciton to apply to A. Just "relu" for this assignment.
        :returns: activation(A)
        """ 
        return self.relu(A)


    def relu(self, X):
        return np.maximum(0,X)
            
    def dropout(self, A, prob):
        """
        :param A: 
        :param prob: drop prob
        :returns: tuple (A, M) 
            WHERE
            A is matrix after applying dropout
            M is dropout mask, used in the backward pass
        """
        M = (np.random.rand(*A.shape) < prob) / prob
        temp = A*M
        return temp, M

    def forwardPropagation(self, X):
        """
        Runs an input X through the neural network to compute activations
        for all layers. Returns the output computed at the last layer along
        with the cache required for backpropagation.
        :returns: (tuple) AL, cache
            WHERE 
            AL is activation of last layer
            cache is cached values for each layer that
                     are needed in further steps
        """
        cache = {"d_activation":[None]*(len(self.parameters["weights"])), "r_activation":[None]*(len(self.parameters["weights"])), "dm":[None]*(len(self.parameters["weights"])-1)}
        cache["d_activation"][0] = X
        cache["r_activation"][0] = X
        j = 1
        while j<len(self.parameters["weights"]):
            cache["r_activation"][j] = self.activationForward(self.affineForward(cache["d_activation"][j-1],self.parameters["weights"][j-1],self.parameters["biases"][j-1]))
            cache["d_activation"][j] = cache["r_activation"][j]
            if self.drop_prob > 0 and self.training_mode==1:
                cache["d_activation"][j], cache["dm"][j-1] = self.dropout(cache["r_activation"][j],self.drop_prob)
            j = j+1
            
        AL = self.affineForward(cache["d_activation"][j-1],self.parameters["weights"][j-1],self.parameters["biases"][j-1])
        return AL, cache
    
    def costFunction(self, AL, y):
        """
        :param AL: Activation of last layer, shape (num_classes, S)
        :param y: labels, shape (S)
        :param alpha: regularization parameter
        :returns cost, dAL: A scalar denoting cost and the gradient of cost
        """
        # compute loss
        exp_AL = np.exp(AL)
        prob = exp_AL/np.sum(exp_AL, axis=0, keepdims=True)
        logprob = -np.log(prob[y,range(AL.shape[1])])
        cost = np.sum(logprob)/AL.shape[1]
        if self.reg_lambda > 0:
            # add regularization
            i=0
            while i<len(self.parameters["weights"]):
                cost = cost+0.5*self.reg_lambda*np.sum(self.parameters["weights"][i]*self.parameters["weights"][i])
                i = i+1
        # gradient of cost
        dAL = prob
        dAL[y,range(AL.shape[1])] -= 1
        dAL /= AL.shape[1]
        return cost, dAL

    def affineBackward(self, dA_prev, cache):
        """
        Backward pass for the affine layer.
        :param dA_prev: gradient from the next layer.
        :param cache: cache returned in affineForward
        :returns dA: gradient on the input to this layer
                 dW: gradient on the weights
                 db: gradient on the bias
        """
        dA = np.dot(self.parameters["weights"][self.num_layers],dA_prev)
        dW = np.dot(cache["d_activation"][self.num_layers],dA_prev.T)
        db = np.sum(dA_prev, axis=1, keepdims=True)
        return dA, dW, db

    def activationBackward(self, dA, cache, activation="relu"):
        """
        Interface to call backward on activation functions.
        In this case, it's just relu. 
        """
        return self.relu_derivative(dA, cache["r_activation"][self.num_layers])
        
    def relu_derivative(self, dx, cached_x):
        dx[cached_x <= 0] = 0
        return dx

    def dropout_backward(self, dA, cache):
        dA = dA*cache["dm"][self.num_layers-1]
        return dA

    def backPropagation(self, dAL, Y, cache):
        """
        Run backpropagation to compute gradients on all paramters in the model
        :param dAL: gradient on the last layer of the network. Returned by the cost function.
        :param Y: labels
        :param cache: cached values during forwardprop
        :returns gradients: dW and db for each weight/bias
        """
        gradients = {"dweights":[None]*(len(self.parameters["weights"])),"dbiases":[None]*(len(self.parameters["weights"]))}
        i = len(self.parameters["weights"])-1
        self.num_layers = i
        temp_dA, temp_dW, temp_db = self.affineBackward(dAL, cache)
        if self.reg_lambda > 0:
            temp_dW += self.reg_lambda*self.parameters["weights"][i]
        gradients["dweights"][i] = temp_dW
        gradients["dbiases"][i] = temp_db
        temp_dA_next = temp_dA
        if self.drop_prob > 0:
            #call dropout_backward
            temp_dA_next = self.dropout_backward(temp_dA, cache)
        temp_dA_next = self.activationBackward(temp_dA_next, cache)
        i = i-1
        while i>=0:
            self.num_layers = i
            temp_dA, temp_dW, temp_db = self.affineBackward(temp_dA_next, cache)
            if self.reg_lambda > 0:
                # add gradients from L2 regularization to each dW
                temp_dW += self.reg_lambda*self.parameters["weights"][i]
            gradients["dweights"][i] = temp_dW
            gradients["dbiases"][i] = temp_db
            temp_dA_next = temp_dA
            if self.drop_prob > 0 and i>0:
                #call dropout_backward
                temp_dA_next = self.dropout_backward(temp_dA, cache)
            temp_dA_next = self.activationBackward(temp_dA_next, cache)      
            i=i-1  
        
        return gradients


    def updateParameters(self, gradients, alpha):
        """
        :param gradients: gradients for each weight/bias
        :param alpha: step size for gradient descent 
        """
        i=0
        while i<len(self.parameters["weights"]):
            self.parameters["weights"][i] += -alpha*gradients["dweights"][i]
            self.parameters["biases"][i] += -alpha*gradients["dbiases"][i]
            i = i+1


    def train(self, X, y, iters, alpha, batch_size, print_every):
        """
        :param X: input samples, each column is a sample
        :param y: labels for input samples, y.shape[0] must equal X.shape[1]
        :param iters: number of training iterations
        :param alpha: step size for gradient descent
        :param batch_size: number of samples in a minibatch
        :param print_every: no. of iterations to print debug info after
        """
        X_tr = X[:,:45001]
        y_tr = y[:45001]
        X_val = X[:,45001:]
        y_val = y[45001:]
        for i in range(0, iters+1):
            # get minibatch
            X_t,y_t = self.get_batch(X_tr,y_tr,batch_size)
            # forward prop
            self.training_mode = 1 #for training mode
            AL,cache = self.forwardPropagation(X_t)
            self.training_mode = 0 #for testing mode
            # compute loss
            cost,dAL = self.costFunction(AL,y_t)
            # compute gradients
            gradients = self.backPropagation(dAL,y_t,cache)
            # update weights and biases based on gradient
            self.updateParameters(gradients,alpha)
            if i % print_every == 0:
                # print cost, train and validation set accuracies
                # print ("iteration:%i" % (i))
                print ("cost:%.2f, train accuracy:%.2f, validation accuracy:%.2f" % (cost,np.mean(self.predict(X_tr) == y_tr),np.mean(self.predict(X_val) == y_val)))
        
    def predict(self, X):
        """
        Make predictions for each sample
        """
        AL,cache = self.forwardPropagation(X)
        y_pred = np.argmax(AL,axis=0)
        return y_pred

    def get_batch(self, X, y, batch_size):
        """
        Return minibatch of samples and labels
        
        :param X, y: samples and corresponding labels
        :parma batch_size: minibatch size
        :returns: (tuple) X_batch, y_batch
        """
        temp = np.random.choice(range(45001), batch_size, replace=False)
        temp1 = X.T[temp]
        X_batch = temp1.T
        y_batch = y[temp]

        return X_batch, y_batch 

In [3]:
# Helper functions, DO NOT modify this

def get_img_array(path):
    """
    Given path of image, returns it's numpy array
    """
    return scipy.misc.imread(path)

def get_files(folder):
    """
    Given path to folder, returns list of files in it
    """
    filenames = [file for file in glob.glob(folder+'*/*')]
    filenames.sort()
    return filenames

def get_label(filepath, label2id):
    """
    Files are assumed to be labeled as: /path/to/file/999_frog.png
    Returns label for a filepath
    """
    tokens = filepath.split('/')
    label = tokens[-1].split('_')[1][:-4]
    if label in label2id:
        return label2id[label]
    else:
        sys.exit("Invalid label: " + label)

In [4]:
# Functions to load data, DO NOT change these

def get_labels(folder, label2id):
    """
    Returns vector of labels extracted from filenames of all files in folder
    :param folder: path to data folder
    :param label2id: mapping of text labels to numeric ids. (Eg: automobile -> 0)
    """
    files = get_files(folder)
    y = []
    for f in files:
        y.append(get_label(f,label2id))
    return np.array(y)

def one_hot(y, num_classes=10):
    """
    Converts each label index in y to vector with one_hot encoding
    """
    y_one_hot = np.zeros((y.shape[0], num_classes))
    a=range(0,y.shape[0])
    y_one_hot[a,y] = 1
    return y_one_hot

def get_label_mapping(label_file):
    """
    Returns mappings of label to index and index to label
    The input file has list of labels, each on a separate line.
    """
    with open(label_file, 'r') as f:
        id2label = f.readlines()
        id2label = [l.strip() for l in id2label]
    label2id = {}
    count = 0
    for label in id2label:
        label2id[label] = count
        count += 1
    return id2label, label2id

def get_images(folder):
    """
    returns numpy array of all samples in folder
    each column is a sample resized to 30x30 and flattened
    """
    files = get_files(folder)
    images = []
    count = 0
    
    for f in files:
        count += 1
        if count % 10000 == 0:
            print("Loaded {}/{}".format(count,len(files)))
        img_arr = get_img_array(f)
        img_arr = img_arr.flatten() / 255.0
        images.append(img_arr)
    X = np.column_stack(images)
    return X

def get_train_data(data_root_path):
    """
    Return X and y
    """
    train_data_path = data_root_path + 'train'
    id2label, label2id = get_label_mapping(data_root_path+'labels.txt')
    print(label2id)
    X = get_images(train_data_path)
    y = get_labels(train_data_path, label2id)
    return X, y

def save_predictions(filename, y):
    """
    Dumps y into .npy file
    """
    np.save(filename, y)

In [5]:
# Load the data
data_root_path = '/home/animesh/deeplearning/cifar10-hw1/'
X_train, y_train = get_train_data(data_root_path) # this may take a few minutes
X_test = get_images(data_root_path + 'test')
print('Data loading done')

{'ship': 8, 'automobile': 1, 'horse': 7, 'frog': 6, 'deer': 4, 'cat': 3, 'airplane': 0, 'bird': 2, 'dog': 5, 'truck': 9}
Loaded 10000/50000
Loaded 20000/50000
Loaded 30000/50000
Loaded 40000/50000
Loaded 50000/50000
Loaded 10000/10000
Data loading done


## Part 1

#### Simple fully-connected deep neural network

In [7]:
layer_dimensions = [X_train.shape[0], 200, 100, 50, 10]  # including the input and output layers
NN = NeuralNetwork(layer_dimensions)
NN.train(X_train, y_train, iters=14000, alpha=0.1, batch_size=500, print_every=1000)

cost:2.30, train accuracy:0.10, validation accuracy:0.10
cost:2.30, train accuracy:0.10, validation accuracy:0.09
cost:2.00, train accuracy:0.23, validation accuracy:0.23
cost:1.85, train accuracy:0.31, validation accuracy:0.29
cost:1.66, train accuracy:0.40, validation accuracy:0.39
cost:1.53, train accuracy:0.44, validation accuracy:0.43
cost:1.51, train accuracy:0.42, validation accuracy:0.39
cost:1.34, train accuracy:0.52, validation accuracy:0.48
cost:1.25, train accuracy:0.55, validation accuracy:0.50
cost:1.20, train accuracy:0.57, validation accuracy:0.50
cost:1.09, train accuracy:0.60, validation accuracy:0.53
cost:1.10, train accuracy:0.59, validation accuracy:0.50
cost:1.03, train accuracy:0.59, validation accuracy:0.49
cost:0.95, train accuracy:0.65, validation accuracy:0.52
cost:0.88, train accuracy:0.67, validation accuracy:0.52


In [8]:
y_predicted = NN.predict(X_test)
save_predictions('ans1-aas2325', y_predicted)

In [9]:
# test if your numpy file has been saved correctly
loaded_y = np.load('ans1-aas2325.npy')
print(loaded_y.shape)
loaded_y[:10]

(10000,)


array([3, 1, 0, 3, 5, 1, 3, 4, 8, 1])

## Part 2: Regularizing the neural network
#### Add dropout and L2 regularization

In [10]:
NN2 = NeuralNetwork(layer_dimensions, drop_prob=0.9, reg_lambda=0.0001)
NN2.train(X_train, y_train, iters=14000, alpha=0.1, batch_size=500, print_every=1000)

cost:2.31, train accuracy:0.10, validation accuracy:0.10
cost:2.31, train accuracy:0.10, validation accuracy:0.10
cost:2.07, train accuracy:0.25, validation accuracy:0.25
cost:1.94, train accuracy:0.27, validation accuracy:0.26
cost:1.69, train accuracy:0.39, validation accuracy:0.38
cost:1.59, train accuracy:0.45, validation accuracy:0.44
cost:1.58, train accuracy:0.44, validation accuracy:0.43
cost:1.45, train accuracy:0.46, validation accuracy:0.44
cost:1.36, train accuracy:0.53, validation accuracy:0.49
cost:1.31, train accuracy:0.55, validation accuracy:0.50
cost:1.29, train accuracy:0.58, validation accuracy:0.52
cost:1.24, train accuracy:0.58, validation accuracy:0.51
cost:1.25, train accuracy:0.61, validation accuracy:0.52
cost:1.09, train accuracy:0.62, validation accuracy:0.52
cost:1.03, train accuracy:0.65, validation accuracy:0.54


In [12]:
y_predicted2 = NN2.predict(X_test)
save_predictions('ans2-aas2325', y_predicted2)

In [13]:
# test if your numpy file has been saved correctly
loaded_y = np.load('ans2-aas2325.npy')
print(loaded_y.shape)
loaded_y[:10]

(10000,)


array([3, 8, 0, 5, 5, 1, 8, 7, 8, 1])