In [1]:
#importing libraries
import numpy as np
import helper

In [2]:
#Data preprocessing
helper.download_dataset()
train_val_dataset, test_dataset = helper.load_images_labels()
train_images, train_labels = helper.extract_images_labels(train_val_dataset)
test_images, test_labels = helper.extract_images_labels(test_dataset)

In [3]:
class NeuralNetwork:
    
    def __init__(self, inputs, hidden_layer1, hidden_layer2, outputs, batch_size=10000, learning_rate=0.1, epochs=100):
        
        self.input_nodes = inputs
        self.hidden_layer1_nodes = hidden_layer1
        self.hidden_layer2_nodes = hidden_layer2
        self.output_nodes = outputs
        
        self.batch_size = batch_size
        self.learning_rate = learning_rate
        self.epochs = epochs
        
        self.weights_input_to_hidden = np.random.normal(0.0, self.input_nodes**-0.5, (self.input_nodes, self.hidden_layer1_nodes))
        self.weights_hidden_to_hidden = np.random.normal(0.0, self.hidden_layer1_nodes**-0.5, (self.hidden_layer1_nodes, self.hidden_layer2_nodes))
        self.weights_hidden_to_output = np.random.normal(0.0, self.hidden_layer2_nodes**-0.5, (self.hidden_layer2_nodes, self.output_nodes))

        
    def sigmoid(self, x):
        
        return 1/(1 + np.exp(-x))
    
    def sigmoid_prime(self, x):
        
        return self.sigmoid(x) * (1 - self.sigmoid(x))
        
    def train_complete(self, images, targets):
        
        num_records = len(images)
        for i in range(self.epochs):
            
            delta_weights_input_to_hidden = np.zeros(self.weights_input_to_hidden.shape)
            delta_weights_hidden_to_hidden = np.zeros(self.weights_hidden_to_hidden.shape)
            delta_weights_hidden_to_output = np.zeros(self.weights_hidden_to_output.shape)
            
            # forward pass
            weights_hidden_layer1_in, weights_hidden_layer1_out, weights_hidden_layer2_in, \
                    weights_hidden_layer2_out, output_layer_in, final_outputs = self.forward_pass(images)
            
            # backpropagation
            delta_weights_input_to_hidden, delta_weights_hidden_to_hidden, \
                        delta_weights_hidden_to_output = self.backpropagation(images, targets, weights_hidden_layer1_in, \
                        weights_hidden_layer1_out, weights_hidden_layer2_in, weights_hidden_layer2_out, output_layer_in, final_outputs, \
                        delta_weights_input_to_hidden, delta_weights_hidden_to_hidden, delta_weights_hidden_to_output)
            
            # Updating the weights
            self.update_weights(delta_weights_input_to_hidden, delta_weights_hidden_to_hidden, delta_weights_hidden_to_output, num_records)
            
            # Calculating the error
            train_error = self.calculate_error(final_outputs, targets)
            validation_error, validation_accuracy = self.calculate_validation_stats()
            print("Epoch {0}, Training Error {1:0.3f}, Validation Error {2:0.3f}, Validation Accuracy {3:0.3f}".format(i, train_error, validation_error, validation_accuracy))
            
                
    def train_batches(self, images, targets):
        
        num_records = len(images)
        for i in range(self.epochs):
            
            #computing the batches
            for j in range(0, num_records, self.batch_size):
                
                batch_images = images[j: j + self.batch_size]
                batch_targets = targets[j: j + self.batch_size]
                
                delta_weights_input_to_hidden = np.zeros(self.weights_input_to_hidden.shape)
                delta_weights_hidden_to_hidden = np.zeros(self.weights_hidden_to_hidden.shape)
                delta_weights_hidden_to_output = np.zeros(self.weights_hidden_to_output.shape)

                # do the forward pass
                weights_hidden_layer1_in, weights_hidden_layer1_out, weights_hidden_layer2_in, \
                        weights_hidden_layer2_out, output_layer_in, final_outputs = self.forward_pass(batch_images)
                
                # do the backpropagation
                delta_weights_input_to_hidden, delta_weights_hidden_to_hidden, \
                        delta_weights_hidden_to_output = self.backpropagation(batch_images, batch_targets, weights_hidden_layer1_in, \
                        weights_hidden_layer1_out, weights_hidden_layer2_in,weights_hidden_layer2_out, output_layer_in, final_outputs, \
                        delta_weights_input_to_hidden, delta_weights_hidden_to_hidden, delta_weights_hidden_to_output)
                
                # Update the weights
                self.update_weights(delta_weights_input_to_hidden, delta_weights_hidden_to_hidden, delta_weights_hidden_to_output, num_records)
                
                # Calculating the error
                train_error = self.calculate_error(final_outputs, batch_targets)
                validation_error, validation_accuracy = self.calculate_validation_stats()
                print("Epoch {0}, Training Error {1:0.3f}, Validation Error {2:0.3f}, Validation Accuracy {3:0.3f}".format(i, \
                                                                    train_error, validation_error, validation_accuracy))
                
                                      
    def forward_pass(self, features):
        
        weights_hidden_layer1_in = np.dot(features, self.weights_input_to_hidden)
        weights_hidden_layer1_out = self.sigmoid(weights_hidden_layer1_in)
        
        weights_hidden_layer2_in = np.dot(weights_hidden_layer1_out, self.weights_hidden_to_hidden)
        weights_hidden_layer2_out = self.sigmoid(weights_hidden_layer2_in)
        
        output_layer_in = np.dot(weights_hidden_layer2_out, self.weights_hidden_to_output)
        final_outputs = self.sigmoid(output_layer_in)
        
        return weights_hidden_layer1_in, weights_hidden_layer1_out, weights_hidden_layer2_in, \
                weights_hidden_layer2_out, output_layer_in, final_outputs
    
    def backpropagation(self, features, labels, weights_hidden_layer1_in, weights_hidden_layer1_out, weights_hidden_layer2_in, \
                              weights_hidden_layer2_out, output_layer_in, final_outputs, \
                              delta_weights_input_to_hidden, delta_weights_hidden_to_hidden, delta_weights_hidden_to_output):

        """ 
        Note: Error in output layer is the difference of predicted outputs to actual outputs
              Error term would be the derivate of the activation function used in that layer. 
              Eg: x -> y -> z=f(y) here the activation function would be z. so error term would be error * f'(y)
                  in case of sigmoid error * sig(y) * (1- sig(y)) ; here z = sig(y)
                  so we can directly write error * z * (1-z) 
        """
        
        error = labels - final_outputs
        output_error_term = error * self.sigmoid_prime(output_layer_in)
        
        hidden_layer2_error = np.dot(output_error_term, self.weights_hidden_to_output.T)
        # we can also use the sigmoid prime method. here we are using direcly output of hidden layer
        hidden_layer2_error_term = hidden_layer2_error * weights_hidden_layer2_out * (1 - weights_hidden_layer2_out) 
        
        hidden_layer1_error = np.dot(hidden_layer2_error_term, self.weights_hidden_to_hidden.T)
        # third way of writing the error term
        hidden_layer1_error_term = hidden_layer1_error * self.sigmoid(weights_hidden_layer1_in) * (1 - self.sigmoid(weights_hidden_layer1_in))
        
        delta_weights_input_to_hidden += np.dot( features.T,  hidden_layer1_error_term )
        delta_weights_hidden_to_hidden += np.dot(weights_hidden_layer1_out.T , hidden_layer2_error_term )
        delta_weights_hidden_to_output += np.dot(weights_hidden_layer2_out.T , output_error_term)
        
        return delta_weights_input_to_hidden, delta_weights_hidden_to_hidden, delta_weights_hidden_to_output
    
    def update_weights(self, delta_weights_input_to_hidden, delta_weights_hidden_to_hidden, delta_weights_hidden_to_output, num_records):
        
        self.weights_input_to_hidden += self.learning_rate * delta_weights_input_to_hidden / num_records
        self.weights_hidden_to_hidden += self.learning_rate * delta_weights_hidden_to_hidden / num_records
        self.weights_hidden_to_output +=  self.learning_rate * delta_weights_hidden_to_output / num_records
    
    def calculate_error(self, final_outputs, labels):
        error = (labels - final_outputs)**2
        mean_error = np.sum(error)/labels.shape[0]
        
        return mean_error
                      
    def calculate_validation_stats(self):
        
        validation_results = self.test(self.validation_features)
        validation_error = (self.validation_targets - validation_results)**2
        mean_validation_error = np.sum(validation_error)/self.validation_targets.shape[0]
        
        check_max = (validation_results == validation_results.max(axis=1)[:,None])
        check_greater_than_80 = validation_results >= 0.8
        both_conditions = check_max & check_greater_than_80
        
        round_validation_results = both_conditions.astype(int)
        labels_int = self.validation_targets.astype(int)
        num_errors = np.count_nonzero(np.sum(round_validation_results - labels_int, axis=1))
        correct = self.validation_targets.shape[0] - num_errors
        validation_accuracy = correct / self.validation_targets.shape[0]
        
        return mean_validation_error, validation_accuracy
        
    def test(self, features):
        
        weights_hidden_layer1_in = np.dot(features, self.weights_input_to_hidden)
        weights_hidden_layer1_out = self.sigmoid(weights_hidden_layer1_in)
        
        weights_hidden_layer2_in = np.dot(weights_hidden_layer1_out, self.weights_hidden_to_hidden)
        weights_hidden_layer2_out = self.sigmoid(weights_hidden_layer2_in)
        
        output_layer_in = np.dot(weights_hidden_layer2_out, self.weights_hidden_to_output)
        final_outputs = self.sigmoid(output_layer_in)
        
        return final_outputs
    
    def run(self, features, targets):
        
        import timeit

        from sklearn.model_selection import train_test_split
        X_train, X_val, y_train, y_val = train_test_split( features, targets, test_size=0.2, random_state=42)
        
        self.validation_features = X_val
        self.validation_targets = y_val

#         start = timeit.timeit()
#         self.train_complete(X_train, y_train)
#         end = timeit.timeit()
#         print("Time for training Neural Network using Complete batch: {0}".format(end - start)) 
        
        start = timeit.timeit()
        self.train_batches(X_train, y_train)
        end = timeit.timeit()
        print("Time for training Neural Network using mini batches: {0}".format(end - start)) 
        
       

In [4]:
network = NeuralNetwork(3072, 128, 32, 10)

In [None]:
network.run(train_images, train_labels)

In [None]:
pred_outputs = network.test(test_images)
error = network.calculate_error(pred_outputs, test_labels)
print(error)