In [1]:
import numpy as np
import pickle

In [12]:
config = {}

config['layer_specs'] = [784, 50, 10]  # The length of list denotes number of hidden layers; each element denotes number of neurons in that layer; first element is the size of input layer, last element is the size of output layer.
config['activation'] = 'sigmoid' # Takes values 'sigmoid', 'tanh' or 'ReLU'; denotes activation function for hidden layers
config['batch_size'] = 1000  # Number of training samples per batch to be passed to network
config['epochs'] = 10  # Number of epochs to train the model
config['early_stop'] = True  # Implement early stopping or not
config['early_stop_epoch'] = 5  # Number of epochs for which validation loss increases to be counted as overfitting
config['L2_penalty'] = 0  # Regularization constant
config['momentum'] = False  # Denotes if momentum is to be applied or not
config['momentum_gamma'] = 0.9  # Denotes the constant 'gamma' in momentum expression
config['learning_rate'] = 0.1 # Learning rate of gradient descent algorithm


In [3]:
def softmax(x):
    return np.exp(x - max(x)) / float(sum(np.exp(x - max(x))))

In [4]:
def load_data(fname):
    data = pickle.load(open(fname, "rb"))
    
    images = data[:, :784]
    labels = data[:, 784]
    
    one_hot = np.zeros((labels.shape[0], 10))
    
    # Encode labels (0-9) to a one-hot encoding
    for i in range(one_hot.shape[0]):
        one_hot[i][int(labels[i])] = 1
        
    labels = one_hot
    
    return images, labels

In [16]:
class Activation:
    def __init__(self, activation_type = "sigmoid"):
        self.activation_type = activation_type
        self.x = None # Save the input 'x' for sigmoid or tanh or ReLU to this variable since it will be used later for computing gradients.
        
    def update(self, config):
        pass
    
    def forward_pass(self, a):
        if self.activation_type == "sigmoid":
            return self.sigmoid(a)
    
        elif self.activation_type == "tanh":
            return self.tanh(a)
    
        elif self.activation_type == "ReLU":
            return self.ReLU(a)
        
    def backward_pass(self, delta):
        if self.activation_type == "sigmoid":
            grad = self.grad_sigmoid()
    
        elif self.activation_type == "tanh":
            grad = self.grad_tanh()
    
        elif self.activation_type == "ReLU":
            grad = self.grad_ReLU()
    
        return grad * delta
      
    def sigmoid(self, x):
        self.x = x
        
        # prevent overflow
        self.x = np.clip(self.x, -100, 100)

        return 1.0 / (1.0 + np.exp(-x)) 

    def tanh(self, x):
        self.x = x
        return np.tanh(x)

    def ReLU(self, x):
        """
        Write the code for ReLU activation function that takes in a numpy array and returns a numpy array.
        """
        self.x = x
        return x * (x > 0)

    def grad_sigmoid(self):
        return self.sigmoid(self.x) * (1 - self.sigmoid(self.x))

    def grad_tanh(self):
        return 1 - np.power(self.tanh(self.x), 2)

    def grad_ReLU(self):
        """
        Write the code for gradient through ReLU activation function that takes in a numpy array and returns a numpy array.
        """
        return self.x > 0


class Layer():
    def __init__(self, in_units, out_units):
        np.random.seed(42)
        self.w = np.random.randn(in_units, out_units)#ones((in_units, out_units))         # Weight matrix
        self.b = np.zeros((1, out_units)).astype(np.float32)  # Bias
        self.x = None    # Save the input to forward_pass in this
        self.a = None    # Save the output of forward pass in this (without activation)
        self.d_x = None  # Save the gradient w.r.t x in this
        self.d_w = None  # Save the gradient w.r.t w in this
        self.d_b = None  # Save the gradient w.r.t b in this
        
    def update(self, config):
        self.w += config['learning_rate'] * self.d_w
        self.b += config['learning_rate'] * self.d_b

    def forward_pass(self, x):
        self.x = x
        self.a = np.matmul(self.w.T, self.x)
        self.a += self.b[0,:]
        
        return self.a
  
    def backward_pass(self, delta):
        """
        Write the code for backward pass. This takes in gradient from its next layer as input,
        computes gradient for its weights and the delta to pass to its previous layers.
        """     
        self.d_w = delta * np.array([self.x]).T
        #self.d_w = np.matmul(delta.T, [self.x.T]).T
        self.d_x = np.matmul(delta, self.w.T)
        self.d_b = delta
        
        #self.w += self.d_w
        #self.b += self.d_b
                
        return self.d_x

      
class Neuralnetwork():
    def __init__(self, config):
        self.layers = []
        self.x = None        # Save the input to forward_pass in this
        self.y = None        # Save the output vector of model in this
        self.targets = None  # Save the targets in forward_pass in this variable
        
        for i in range(len(config['layer_specs']) - 1):
            self.layers.append( Layer(config['layer_specs'][i], config['layer_specs'][i+1]) )
            
            if i < len(config['layer_specs']) - 2:
                self.layers.append(Activation(config['activation']))  
    
    def loss_func(self, logits, targets):
        total = 0         # total accumulated loss
        e = 0.0000001       # epsilon for stability; prevent log(0)
    
        temp = np.log(logits + e)
        temp = targets * temp
    
        return -(np.sum(temp))
    
    def update(self, config):
        for layer in self.layers:
            layer.update(config)
    
    
    def forward_pass(self, x, targets=None):
        """
        Write the code for forward pass through all layers of the model and return loss and predictions.
        If targets == None, loss should be None. If not, then return the loss computed.
        """
        x = x[0]
        
        self.x = x
        self.targets = targets
        
        # Propagate the data
        for layer in self.layers:
            x = layer.forward_pass(x)
        self.y = softmax(x)
        
        # Calculate the loss
        if self.targets is None:
            loss = None
        else:
            loss = self.loss_func(self.y, self.targets)
        
        return loss, self.y

    def backward_pass(self):
        '''
        implement the backward pass for the whole network. 
        hint - use previously built functions.
        '''
        delta = np.array([self.targets - self.y])
        
        for layer in reversed(self.layers):
            delta = layer.backward_pass(delta)

In [23]:
def get_random_samples(x, y, percentage):
    indices = np.random.permutation(len(x))[:(len(x) * percentage / 100)]
    
    randomX = [x[i] for i in indices]
    randomY = [y[i] for i in indices]
    
    randomX = np.array(randomX)
    randomY = np.array(randomY)
    
    return randomX, randomY

def accuracy(predicts, actuals):
    predicts = np.round_(predicts)
    correct = np.equal(actuals, predicts)
    
    return np.sum(correct) * 1.0 / correct.size

def trainer(model, X_train, y_train, X_valid, y_valid, config):
    """
    Write the code to train the network. Use values from config to set parameters
    such as L2 penalty, number of epochs, momentum, etc.
    """
    
    for i in range(config['epochs']):
        train_predictions = []
        train_loss = 0
        
        valid_predictions = []
        valid_loss = 0
        
        for x, y in zip(X_train, y_train):
            l, p = model.forward_pass([x], y)
            model.backward_pass()
            model.update(config)
            
            train_predictions.append(p)
            train_loss += l
            
        for x, y in zip(X_valid, y_valid):
            l, p = model.forward_pass([x], y)
            
            valid_predictions.append(p)
            valid_loss += l
            
        train_accuracy = accuracy(train_predictions, y_train)
        valid_accuracy = accuracy(valid_predictions, y_valid)
            
        print("Epoch " + str(i) + ": Acc = " + str(acc) + ", Loss = " + str(loss))

def test(model, X_test, y_test, config):
    """
    Write code to run the model on the data passed as input and return accuracy.
    """
    predictions = []
    loss = 0
    
    for x, y in zip(X_test, y_test):
        l, p = model.forward_pass([x], y)
        
        predictions.append(p)
        loss += l
    
    return accuracy(predictions, y_test), loss

In [22]:
if __name__ == "__main__":
    train_data_fname = 'MNIST_train.pkl'
    valid_data_fname = 'MNIST_valid.pkl'
    test_data_fname = 'MNIST_test.pkl'

    ### Train the network ###
    model = Neuralnetwork(config)
    X_train, y_train = load_data(train_data_fname)
    X_valid, y_valid = load_data(valid_data_fname)
    X_test, y_test = load_data(test_data_fname)
    
    print("*** Training model ***")
    trainer(model, X_train, y_train, X_valid, y_valid, config)
    
    print("*** Testing model ***")
    accuracy, loss = test(model, X_test, y_test, config)
    print("Test: Acc = " + str(accuracy) + ", Loss = " + str(loss))
    
    

*** Training model ***
Epoch 0: Acc = 0.926278, Loss = 65920.51843174506
Epoch 1: Acc = 0.946872, Loss = 48508.02340497098
Epoch 2: Acc = 0.95042, Loss = 46326.970664592794
Epoch 3: Acc = 0.951454, Loss = 44938.75881572363
Epoch 4: Acc = 0.954944, Loss = 41685.1194568158
Epoch 5: Acc = 0.960612, Loss = 37148.083658303694
Epoch 6: Acc = 0.965668, Loss = 32522.07474646748
Epoch 7: Acc = 0.967826, Loss = 30226.077227439295
Epoch 8: Acc = 0.968086, Loss = 29718.152178649925
Epoch 9: Acc = 0.967356, Loss = 30060.77439137463
*** Testing model ***
Test: Acc = 0.97043, Loss = 0.11706322586136919


In [8]:
if __name__ == "__main__":
    train_data_fname = 'MNIST_train.pkl'
    valid_data_fname = 'MNIST_valid.pkl'
    test_data_fname = 'MNIST_test.pkl'

    ### Train the network ###
    model = Neuralnetwork(config)
    X_train, y_train = load_data(train_data_fname)
    X_valid, y_valid = load_data(valid_data_fname)
    X_test, y_test = load_data(test_data_fname)
    # trainer(model, X_train, y_train, X_valid, y_valid, config)
    # test_acc = test(model, X_test, y_test, config)
      
    ### Test on a subset of data ###
    eps = 0.001   # epsilon used to approximate gradient
    
    out_b = 3     # output bias weight 
    hid_b = 82    # hidden bias weight
    
    # 2 hidden to output weights
    hid_w1 = (62, 4)
    hid_w2 = (36, 7)
    
    # 2 input to hidden weights
    in_w1 = (539, 21)
    in_w2 = (420, 99)
    
    toy_model = Neuralnetwork(config)
    
    toy_xtrain = X_train
    toy_ytrain = y_train[0]
  
    '''
    og_weights = toy_model.layers[4].b[0, out_b]
    
    toy_model.layers[4].b[0, out_b] = eps
    loss1 = toy_model.forward_pass(toy_xtrain, toy_ytrain)[0]
    print (loss1)
    toy_model.layers[4].b[0, out_b] = (-eps)
    loss2 = toy_model.forward_pass(toy_xtrain, toy_ytrain)[0]
    print (loss2)
    
    est_grad = (loss1 - loss2) / (2.0 * eps)
    
    toy_model.layers[4].b[0, out_b] = og_weights
    
    toy_model.forward_pass(toy_xtrain, toy_ytrain)[0]
    toy_model.backward_pass()
    
    calc_grad = toy_model.layers[4].d_b[0, out_b]
    
    print (calc_grad)
    print (est_grad)
    '''
    
    hid_b = 7    # hidden bias weight
    og_weights = toy_model.layers[2].b[0, hid_b]
    
    toy_model.layers[4].w[hid_w1] += eps
    loss1 = toy_model.forward_pass(toy_xtrain, toy_ytrain)[0]
    print (loss1)
    
    toy_model.layers[4].w[hid_w1] -= (2.0 * eps)
    loss2 = toy_model.forward_pass(toy_xtrain, toy_ytrain)[0]
    print (loss2)
    
    est_grad = (loss1 - loss2) / (2.0 * eps)
    
    toy_model.layers[4].w[hid_w1] = og_weights
    
    toy_model.forward_pass(toy_xtrain, toy_ytrain)[0]
    toy_model.backward_pass()
    
    calc_grad = toy_model.layers[2].d_w[hid_w1]
    
    print (calc_grad)
    print (est_grad)

IndexError: list index out of range