# COMS 4995_002 Deep Learning Assignment 1
Due on Monday, Oct 9, 11:59pm

This assignment can be done in groups of at most 3 students. Everyone must submit on Courseworks individually.

Write down the UNIs of your group (if applicable)

Member 1: Mike Alvarino, maa2282

Member 2: Name, UNI

Member 3: Name, UNI

In [31]:
if __name__ == '__main__':
    %matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
import scipy.misc
import glob
import sys
# you shouldn't need to make any more imports

In [38]:
class NeuralNetwork(object):
    """
    Abstraction of neural network.
    Stores parameters, activations, cached values. 
    Provides necessary functions for training and prediction. 
    """
    def __init__(self, layer_dimensions, drop_prob=0.0, reg_lambda=0.0):
        """
        Initializes the weights and biases for each layer
        :param layer_dimensions: (list) number of nodes in each layer
        :param drop_prob: drop probability for dropout layers. Only required in part 2 of the assignment
        :param reg_lambda: regularization parameter. Only required in part 2 of the assignment
        """
        np.random.seed(1)
        
        self.parameters = {}
        self.num_layers = len(layer_dimensions) - 1
        self.drop_prob = drop_prob
        self.reg_lambda = reg_lambda
        self._batch_size = 0
        self._batch_num = 0
        
        # init parameters
        for i in range(1, self.num_layers + 1):
            # initialize each parameter to ([random_array], 1)* 0.001
            # TODO: wtf? why do i need to scale the weights down by 0.001?
            self.parameters[i] = {'w': np.random.random(size=(layer_dimensions[i], layer_dimensions[i - 1]))*1e-3,
                                 'b': np.ones(layer_dimensions[i])}
#             print("self.parameters[{}]['w'].shape".format(i), self.parameters[i]['w'].shape)
#             print("self.parameters[{}]['b'].shape".format(i), self.parameters[i]['b'].shape)
            
    def affineForward(self, A, W, b):
        """
        Forward pass for the affine layer.
        :param A: input matrix, shape (L, S), where L is the number of hidden units in the previous layer and S is
        the number of samples
        :returns: the affine product WA + b, along with the cache required for the backward pass
        """
        Z = np.dot(W, A) #+ b
        return Z

    def activationForward(self, A, activation="relu"):
        """
        Common interface to access all activation functions.
        :param A: input to the activation function
        :param prob: activation function to apply to A. Just "relu" for this assignment.
        :returns: activation(A)
        """ 
        if activation == "relu":
            return self.relu(A)
        elif activation == "softmax":
            return self.softmax(A)

    def relu(self, X):
        return np.maximum(0, X)

    def softmax(self, X):
        exponentiated = np.exp(X)
        sums = np.apply_along_axis(np.sum, 0, exponentiated)
        return np.array([e/summation for summation, e in zip(sums, exponentiated.T)])
            
    def dropout(self, A, prob):
        """
        :param A: 
        :param prob: drop prob
        :returns: tuple (A, M) 
            WHERE
            A is matrix after applying dropout
            M is dropout mask, used in the backward pass
        """
        
        M = np.random.choice([0, 1], size=A.shape, p=[prob, 1-prob])
        a = np.multiply(A, M)

        return a, M

    def forwardPropagation(self, X, training=False):
        """
        Runs an input X through the neural network to compute activations
        for all layers. Returns the output computed at the last layer along
        with the cache required for backpropagation.
        :returns: (tuple) AL, cache
            WHERE 
            AL is activation of last layer
            cache is cached values for each layer that
                     are needed in further steps
        """
        #drop_prob = .50
        
        if training:
            m2 = np.random.binomial(1, self.drop_prob)
        else:
            m2 = self.drop_prob
        #y2 *= m2
        
        # parameters is indexed at 1
        cache = {}
        A = X
        for layer in self.parameters:
#             print("A{}.shape".format(layer), A.shape)
            W = self.parameters[layer]['w']
            b = self.parameters[layer]['b']
            affine = self.affineForward(A, W, b)
            if layer == self.num_layers:
                cache[layer] = {"A": A, "w": W, "b": b}
                A = affine
                continue
            else:
                activated = self.activationForward(affine, activation="relu") * m2
            cache[layer] = {"z": affine, "g": activated, "w": W, "A": A, "b": b}
            A = affine
        return A, cache
    
    def costFunction(self, AL, y):
        """
        :param AL: Activation of last layer, shape (num_classes, S) (activation of last layer is softmax(Z_l))
        :param y: labels, shape (S)
        :param alpha: regularization parameter
        :returns cost, dAL: A scalar denoting cost and the 
                    gradient of cost (derivative of loss wrt Al https://piazza.com/class/j5y4qz1uu1f1hf?cid=25)
        """
        AL = self.softmax(AL).T
#         print("AL_softmax.shape", AL.shape)
#         print("y.shape", y.shape)
        y_hat = np.choose(y, AL)
        
#         print("correct_label_prob.shape", y_hat.shape)
        neg_log_probs = -np.log(y_hat)
        
        cost = np.sum(neg_log_probs)/self._batch_size
        
        if self.reg_lambda > 0:
            norms = []
            for l in self.parameters:
                norms.append(np.linalg.norm(self.parameters[l]['w']))
            sum_of_norms = np.sum(np.array(norms) **2)
            cost += self.reg_lambda * sum_of_norms / (2.0 * self._batch_size)
        
#         print("cost.shape", cost.shape)
        #if self.reg_lambda > 0:
            # add regularization
        #    pass
        # derivative of the cost wrt z
        
        dAL = np.copy(AL)
        dAL[y, range(self._batch_size)] = dAL[y, range(self._batch_size)] - 1
        
#         print("dAL.shape", dAL.shape)
        return cost, dAL

    def deriv_softmax(self, softmax):
        return softmax - 1
    
    def affineBackward(self, dA_prev, cache):
        """
        Backward pass for the affine layer.
        :param dA_prev: gradient from the next layer.
        :param cache: cache returned in affineForward
        :returns dA: gradient on the input to this layer
                 dW: gradient on the weights
                 db: gradient on the bias
        """
        
        dW = np.dot(dA_prev, cache['A'].T) + self.reg_lambda * cache['w']
        db = dA_prev
        dA = np.dot(cache['w'].T, dA_prev)
        
        return dA, dW, db

    def activationBackward(self, dA, cache, activation="relu"):
        """
        Interface to call backward on activation functions.
        In this case, it's just relu. 
        """
        
        return self.relu_derivative(dA, cache['z'])
        
        
    def relu_derivative(self, dx, cache):
        """ 
        dx - preceding derivative in chain rule
        cache - affine output of layer 
        """
        return np.multiply(dx, 1. * (cache > 0))

    def dropout_backward(self, dA, cache):

        return dA
    
    def backPropagation(self, dAL, Y, cache):
        """
        Run backpropagation to compute gradients on all paramters in the model
        :param dAL: gradient on the last layer of the network. Returned by the cost function.
        :param Y: labels, not needed
        :param cache: cached values during forwardprop
        :returns gradients: dW and db for each weight/bias
        """
        
        gradients = {}
        # derivative of Loss wrt any activation layer
        for layer_idx in range(self.num_layers, 0, -1):
#             print("=========" + str(layer_idx) + "==========")
            
            if not self.num_layers == layer_idx:
                dA_prev = self.activationBackward(dAL, cache[layer_idx])
            else:
                dA_prev = dAL
            
            dA, dW, db = self.affineBackward(dA_prev, cache[layer_idx])
            
            gradients[layer_idx] = dW
            
            dAL = dA
            
        return gradients

    def updateParameters(self, gradients, alpha):
        """
        :param gradients: gradients for each weight/bias
        :param alpha: step size for gradient descent 
        """
        for param_idx in self.parameters:
            self.parameters[param_idx]['w'] = self.parameters[param_idx]['w'] - alpha * gradients[param_idx]

    def train(self, X, y, iters=1000, alpha=0.0001, batch_size=100, print_every=100):
        """
        :param X: input samples, each column is a sample
        :param y: labels for input samples, y.shape[0] must equal X.shape[1]
        :param iters: number of training iterations
        :param alpha: step size for gradient descent
        :param batch_size: number of samples in a minibatch
        :param print_every: no. of iterations to print debug info after
        """
        
        train_interval = range(X.shape[1] * 9 // 10)
        test_interval = range(X.shape[1] * 9 //10, X.shape[1])
        X_train, y_train = X[:, train_interval], y[train_interval]
        self.X_test, self.y_test = X[:, test_interval], y[test_interval]
        
        self._batch_size = batch_size
        for i in range(0, iters):
            # get minibatch
            X_batch, y_batch = self.get_batch(X, y)
            
            # forward prop
            AL, cache = self.forwardPropagation(X_batch, training=True)
            
            # compute loss
            cost, dAL = self.costFunction(AL, y_batch)
            
            # compute gradients
            gradients = self.backPropagation(dAL, y_batch, cache)

            # update weights and biases based on gradient
            self.updateParameters(gradients, alpha)

            if i % print_every == 0:
                # print cost, train and validation set accuracies
                print(cost)
                
    def predict(self, X):
        """
        Make predictions for each sample
        """
        # forward propagation
        AL, cache = self.forwardPropagation(X)
        #print(AL)
        
        # compute the output layer
        probs = self.softmax(AL).T
        print(probs.shape)
        
        # Take the max of probs to obtain proper index
        probs_index = np.argmax(probs, axis=0)
        print(probs_index)
        
        return probs_index
    
    def accuracy(self):
        """
        This will calculate the accuracy of the Network's Predictions
        """
        # Calculate where the indices of x and y match, sum and divide by total numbers
        y_pred = self.predict(self.X_test)
        #print(y_pred)
        
        a = np.sum(np.equal(y_pred, self.y_test))/float(5000)
        print(float(a))
        return a
        

    def get_batch(self, X, y):
        """
        Return minibatch of samples and labels
        
        :param X, y: samples and corresponding labels
        :parma batch_size: minibatch size
        :returns: (tuple) X_batch, y_batch
        """
        indices = np.random.permutation(range(self._batch_size)) + np.random.randint(X.shape[1] - self._batch_size)
        X_batch = X[:, indices]
        y_batch = y[indices]
        return X_batch, y_batch
        

In [39]:
# Helper functions, DO NOT modify this

def get_img_array(path):
    """
    Given path of image, returns it's numpy array
    """
    return scipy.misc.imread(path)

def get_files(folder):
    """
    Given path to folder, returns list of files in it
    """
    filenames = [file for file in glob.glob(folder+'*/*')]
    filenames.sort()
    return filenames

def get_label(filepath, label2id):
    """
    Files are assumed to be labeled as: /path/to/file/999_frog.png
    Returns label for a filepath
    """
    tokens = filepath.split('/')
    label = tokens[-1].split('_')[1][:-4]
    if label in label2id:
        return label2id[label]
    else:
        sys.exit("Invalid label: " + label)

In [40]:
# Functions to load data, DO NOT change these

def get_labels(folder, label2id):
    """
    Returns vector of labels extracted from filenames of all files in folder
    :param folder: path to data folder
    :param label2id: mapping of text labels to numeric ids. (Eg: automobile -> 0)
    """
    files = get_files(folder)
    y = []
    for f in files:
        y.append(get_label(f,label2id))
    return np.array(y)

def one_hot(y, num_classes=10):
    """
    Converts each label index in y to vector with one_hot encoding
    """
    y_one_hot = np.zeros((y.shape[0], num_classes))
    y_one_hot[y] = 1
    return y_one_hot.T

def get_label_mapping(label_file):
    """
    Returns mappings of label to index and index to label
    The input file has list of labels, each on a separate line.
    """
    with open(label_file, 'r') as f:
        id2label = f.readlines()
        id2label = [l.strip() for l in id2label]
    label2id = {}
    count = 0
    for label in id2label:
        label2id[label] = count
        count += 1
    return id2label, label2id

def get_images(folder):
    """
    returns numpy array of all samples in folder
    each column is a sample resized to 30x30 and flattened
    """
    files = get_files(folder)
    images = []
    count = 0
    
    for f in files:
        count += 1
        if count % 10000 == 0:
            print("Loaded {}/{}".format(count,len(files)))
        img_arr = get_img_array(f)
        img_arr = img_arr.flatten() / 255.0
        images.append(img_arr)
    X = np.column_stack(images)

    return X

def get_train_data(data_root_path):
    """
    Return X and y
    """
    train_data_path = data_root_path + 'train'
    id2label, label2id = get_label_mapping(data_root_path+'labels.txt')
    print(label2id)
    X = get_images(train_data_path)
    y = get_labels(train_data_path, label2id)
    return X, y

def save_predictions(filename, y):
    """
    Dumps y into .npy file
    """
    np.save(filename, y)

In [35]:
# Load the data
if __name__ == '__main__':
    data_root_path = './cifar10-hw1/'
    X_train, y_train = get_train_data(data_root_path) # this may take a few minutes
    X_test = get_images(data_root_path + 'test')
    print('Data loading done')

{'horse': 7, 'automobile': 1, 'deer': 4, 'dog': 5, 'frog': 6, 'cat': 3, 'truck': 9, 'ship': 8, 'airplane': 0, 'bird': 2}
Loaded 10000/50000
Loaded 20000/50000
Loaded 30000/50000
Loaded 40000/50000
Loaded 50000/50000
Loaded 10000/10000
Data loading done


## Part 1

#### Simple fully-connected deep neural network

In [41]:
if __name__ == '__main__':
    layer_dimensions = [X_train.shape[0], 40, 10]  # including the input and output layers
    NN = NeuralNetwork(layer_dimensions, reg_lambda=0.0)
    NN.train(X_train, y_train, iters=1500, alpha=.00075, batch_size=75, print_every=100)

2.3026303624
2.17388990916
2.05853453829
1.98883481627
2.10241431409
1.87238511745
2.16056396794
1.97735168026
1.93148080444
1.8259013045
1.95931190802
1.89265810948
1.70708562197
1.83499803598
1.83353547589


In [42]:
y_predicted = NN.predict(X_test)
#print(y_predicted)
save_predictions('ans1-rld2126', y_predicted)


(10, 10000)
[3 9 8 ..., 5 6 7]


In [43]:
accuracy = NN.accuracy()
print(float(accuracy))

(10, 5000)
[5 4 8 ..., 6 6 9]
0.3452
0.3452


In [44]:
# test if your numpy file has been saved correctly
loaded_y = np.load('ans1-rld2126.npy')
print(loaded_y.shape)
loaded_y[:10]

(10000,)


array([3, 9, 8, 4, 5, 8, 9, 7, 8, 1])

## Part 2: Regularizing the neural network
#### Add dropout and L2 regularization

In [45]:
if __name__ == '__main__':
    NN2 = NeuralNetwork(layer_dimensions, drop_prob=.50, reg_lambda=.20)
    NN2.train(X_train, y_train, iters=1500, alpha=0.00075, batch_size=75, print_every=100)

2.30268520075
2.17533015788
2.05834844868
1.99536622217
2.10039591773
1.88886932282
2.15854880801
1.97485033559
1.94380264962
1.84915606608
1.97126442295
1.90585466893
1.70957719055
1.8545257712
1.8452305532


In [47]:
if __name__ == '__main__':
    y_predicted2 = NN2.predict(X_test)
    save_predictions(y_predicted, 'ans2-rld2126')

(10, 10000)
[3 8 8 ..., 5 3 7]


AttributeError: 'numpy.ndarray' object has no attribute 'write'