In [2]:
import numpy as np
import pickle, random, csv
random.seed(1026847926404610461)

In [3]:
def load(model_file):
    """
    Loads the network from the model_file
    :param model_file: file onto which the network is saved
    :return: the network
    """
    return pickle.load(open(model_file))

Implementation of fully connected feed forward neural network

In [4]:
class FullyConnectedFeedForwardNN(object):
    """
    Implementation of a Fully connected feed forward Neural Network. 
    This implementation implements only one hidden layer.
    """
    def __init__(self, input_size, hidden_size, output_size, learning_rate=0.01, activation = 'relu'):
        """
        Initialize the network with input, output sizes, weights and biases
        :param input_dim: input dim
        :param hidden_size: number of hidden units
        :param output_dim: output dim
        :param learning_rate: learning rate alpha
        :param reg_lambda: regularization rate lambda
        :return: None
        """
        self.input_size = input_size
        self.output_size = output_size
        self.hidden_size = hidden_size
        self.W_xh = np.random.randn(self.hidden_size, self.input_size) * np.sqrt(2/self.input_size) # Weight matrix for input to hidden
        self.W_hy = np.random.randn(self.output_size, self.hidden_size) * np.sqrt(2/self.hidden_size) # Weight matrix for hidden to output
        self.bias_h = np.zeros((self.hidden_size, 1)) # hidden bias
        self.bias_y = np.zeros((self.output_size, 1)) # output bias
        self.learning_rate = learning_rate
        if(activation == 'sigmoid'):
            self.activation = self._sigmoid
            self.activation_derivative = self._sigmoid_derivative
        elif(activation == 'tanh'):
            self.activation = self._tanh
            self.activation_derivative = self._tanh_derivative
        else:
            self.activation = self._relu
            self.activation_derivative = self._relu_derivative
    
    def _relu(self,Z):
        return np.maximum(Z, 0)
    def _tanh(self,Z):
        return np.tanh(Z)
    def sigmoid(self,Z):
        return 1/(1+np.exp(-Z))
    
    def _relu_derivative(self,Z):
        Z[Z<=0] = 0
        Z[Z>0] = 1
        return Z
    def _tanh_derivative(self,Z):
        return (1 - Z * Z)
    def _sigmoid_derivative(self,Z):
        return Z*(1-Z)
    
    def _forward_propagation(self, X):
        """
        Performs forward pass of the ANN
        :param X: input
        :return: hidden activations, softmax probabilities for the output
        """
        #hidden_activations = np.tanh(np.dot(self.W_xh, np.reshape(X,(len(X),1))) + self.bias_h)
        Z = np.dot(self.W_xh, np.reshape(X,(len(X),1))) + self.bias_h
        hidden_activations = self.activation(Z)
        y_s = np.exp(np.dot(self.W_hy, hidden_activations) + self.bias_y)
        prob_values = y_s/np.sum(y_s)
        return hidden_activations, prob_values

    def _update_parameter(self, delta_W_xh, delta_bias_h, delta_W_hy, delta_bias_y):
        """
        Update the weights and biases during gradient descent
        :param dWxh: weight derivative from input to hidden
        :param dbh: bias derivative from input to hidden
        :param dWhy: weight derivative from hidden to output
        :param dby: bias derivative from hidden to output
        :return: None
        """
        self.W_xh += -self.learning_rate * delta_W_xh
        self.bias_h += -self.learning_rate * delta_bias_h
        self.W_hy += -self.learning_rate * delta_W_hy
        self.bias_y += -self.learning_rate * delta_bias_y

    def _back_propagation(self, X, target_class, hidden_activations, prob_values):
        """
        Implementation of the backpropagation algorithm
        :param X: input
        :param t: target
        :param h_a: hidden activation from forward pass
        :param probs: softmax probabilities of output from forward pass
        :return: dWxh, dWhy, dbh, dby
        """
        delta_W_xh, delta_W_hy = np.zeros_like(self.W_xh), np.zeros_like(self.W_hy)
        delta_bias_h, delta_bias_y = np.zeros_like(self.bias_h), np.zeros_like(self.bias_y)
        
        delta_y = np.copy(prob_values)
        delta_y[target_class] -= 1
        delta_W_hy = np.dot(delta_y, hidden_activations.T)
        delta_bias_y += delta_y
        
        delta_h = np.dot(self.W_hy.T, delta_y)  # backprop into h
        delta_h_error = self.activation_derivative(hidden_activations) * delta_h # backprop through tanh nonlinearity
        #delta_h_error = (1 - hidden_activations * hidden_activations) * delta_h # backprop through tanh nonlinearity
        delta_bias_h += delta_h_error
        
        delta_W_xh += np.dot(delta_h_error, np.reshape(X, (len(X), 1)).T)
        return delta_W_xh, delta_W_hy, delta_bias_h, delta_bias_y

    def _calc_smooth_loss(self, loss, len_examples):
        """
        Calculate the smoothened loss over the set of examples
        :param loss: loss calculated for a sample
        :param len_examples: total number of samples in training + validation set
        :param regularizer_type: type of regularizer like L1, L2, Dropout
        :return: smooth loss
        """
        return 1./len_examples * loss

    def train(self, inputs, targets, num_epochs,model_file = "NNModel.pkl"):
        """
        Trains the network by performing forward pass followed by backpropagation
        :param inputs: list of training inputs
        :param targets: list of corresponding training targets
        :param validation_data: tuple of (X,y) where X and y are inputs and targets
        :param num_epochs: number of epochs for training the model
        :param regularizer_type: type of regularizer like L1, L2, Dropout
        :return: None
        """
        for k in range(num_epochs):
            loss = 0
            for i in range(len(inputs)):
                # Forward pass
                hidden_activations, prob_values = self._forward_propagation(inputs[i])
                loss += -np.log(prob_values[targets[i], 0])

                # Backpropagation
                delta_W_xh, delta_W_hy, delta_bias_h, delta_bias_y = self._back_propagation(inputs[i], targets[i], hidden_activations, prob_values)

                # Perform the parameter update with gradient descent
                self._update_parameter(delta_W_xh, delta_bias_h, delta_W_hy, delta_bias_y)

            if k%1 == 0:
                print("Epoch " + str(k) + " : Loss = " + str(self._calc_smooth_loss(loss, len(inputs))))
            
        self.save(model_file)


    def predict(self, X):
        """
        Given an input X, emi
        :param X: test input
        :return: the output class
        """
        hidden_activations, prob_values = self._forward_propagation(X)
        # return probs
        return np.argmax(prob_values)

    def save(self, model_file):
        """
        Saves the network to a file
        :param model_file: name of the file where the network should be saved
        :return: None
        """
        pickle.dump(self, open(model_file, 'wb'))

This is just a test cell, to test the working of NN. Need to remove this in the final version.

In [5]:
nn = FullyConnectedFeedForwardNN(4,8,4)
inputs = []
targets = []
for i in range(1000):
    num = random.randint(0,3)
    inp = np.zeros((4,))
    inp[num] = 1
    inputs.append(inp)
    targets.append(num)

nn.train(inputs, targets, 10)
print(nn.predict([1,0,0,0]))
print(nn.predict([0,1,0,0]))
print(nn.predict([0,0,1,0]))
print(nn.predict([0,0,0,1]))

Epoch 0 : Loss = 0.5354056555242742
Epoch 1 : Loss = 0.060108421946899755
Epoch 2 : Loss = 0.022738730816128705
Epoch 3 : Loss = 0.01335841806943601
Epoch 4 : Loss = 0.009277469210338958
Epoch 5 : Loss = 0.007016084900997115
Epoch 6 : Loss = 0.005594879611682645
Epoch 7 : Loss = 0.004626158258098615
Epoch 8 : Loss = 0.003927156955039262
Epoch 9 : Loss = 0.0034010281391607853
0
1
2
3


Part 2 : Load circles dataset 

In [6]:
file = open("circles500.csv")
index = 0
circles_data = []
for line in file :
    if index == 0:
        index +=1
        continue
    x0,x1,output_class = line.split(',')
    current_row = {}
    inp = np.asarray([float(x0),float(x1)])
    current_row["inp"] = inp
    current_row["out"] = int(output_class)
    circles_data.append(current_row)

Run Neural network model on circles dataset

In [7]:
nn=FullyConnectedFeedForwardNN(2,4,2)
inputs=[]
targets=[]
num_epocs, counter = 50, 0
training_size = int(0.9*len(circles_data))
for i in range(0,len(circles_data)):
    targets.append(circles_data[i]['out'])
    inputs.append(circles_data[i]['inp'])
nn.train(inputs[:training_size], targets[:training_size], num_epocs, model_file="Circles_NN_Model.pkl")
counter=0
for j in range(len(circles_data[training_size:])):
    s=nn.predict(circles_data[j+training_size]['inp'])
    s1=circles_data[j+training_size]['out']
    if s == s1:
        counter+=1
print("Accuracy : ",((counter*1.0)/(j+1))*100)

Epoch 0 : Loss = 0.7025880111488243
Epoch 1 : Loss = 0.608607716845455
Epoch 2 : Loss = 0.5439252338026608
Epoch 3 : Loss = 0.48485546709701666
Epoch 4 : Loss = 0.4290302797227898
Epoch 5 : Loss = 0.37832266108753876
Epoch 6 : Loss = 0.3336297175578382
Epoch 7 : Loss = 0.29581315347690107
Epoch 8 : Loss = 0.26465771315449843
Epoch 9 : Loss = 0.23925130909523612
Epoch 10 : Loss = 0.2185215333214398
Epoch 11 : Loss = 0.20133624843299514
Epoch 12 : Loss = 0.1869104812062117
Epoch 13 : Loss = 0.17461174439755342
Epoch 14 : Loss = 0.16405793488691012
Epoch 15 : Loss = 0.15487270930612346
Epoch 16 : Loss = 0.14684731001162368
Epoch 17 : Loss = 0.13973439589768544
Epoch 18 : Loss = 0.1334004920507077
Epoch 19 : Loss = 0.1276925126569793
Epoch 20 : Loss = 0.12253107544320974
Epoch 21 : Loss = 0.1178447481925892
Epoch 22 : Loss = 0.11356079881164771
Epoch 23 : Loss = 0.10963959796227489
Epoch 24 : Loss = 0.10602096624169757
Epoch 25 : Loss = 0.10262745036971278
Epoch 26 : Loss = 0.0994556163541

Part 3 : Load CIFAR Dataset. 
Will load cat and deer train(all batches) and test samples

In [27]:
folder = "CIFAR_Dataset"
train_files = [folder+"/data_batch_"+str(i) for i in range(1,6) ]
test_file = folder+"/test_batch"
def NormalizeData(data):
    return (data - np.min(data)) / (np.max(data) - np.min(data))
def Convert_rgb_to_grayscale_and_normalize(image_vector):
    grayscale_vector = []
    individual_spec_length = int(len(image_vector)/3) 
    for i in range(individual_spec_length):
        red_value = image_vector[i]
        green_value = image_vector[i + individual_spec_length]
        blue_value = image_vector[i+ (2*individual_spec_length)]
        # New grayscale image = ( (0.3 * R) + (0.59 * G) + (0.11 * B) ).
        grayscale_value = ((0.3*red_value) + (0.59*green_value) + (0.11*blue_value))
        grayscale_vector.append(grayscale_value)
    return NormalizeData(np.asarray(grayscale_vector))
# This function taken from the CIFAR website
def unpickle(file):
    import pickle
    with open(file, 'rb') as fo:
        dict = pickle.load(fo, encoding='bytes')
    return dict
deer_samples, cat_samples = {}, {}
deer_samples['train'], cat_samples['train'] = [], []
cat_samples['test'], deer_samples['test'] = [], []
for file in train_files :
    train_data = unpickle(file)
    for i in range(len(train_data[b'labels'])):
        if train_data[b'labels'][i] == 3:
            cat_samples['train'].append(Convert_rgb_to_grayscale_and_normalize(train_data[b'data'][i].tolist()))
        if train_data[b'labels'][i] == 4:
            deer_samples['train'].append(Convert_rgb_to_grayscale_and_normalize(train_data[b'data'][i].tolist()))

test_data = unpickle(test_file)
for i in range(0,len(test_data[b'labels'])):
    if test_data[b'labels'][i] == 3:
        cat_samples['test'].append(Convert_rgb_to_grayscale_and_normalize(test_data[b'data'][i].tolist()))
    if test_data[b'labels'][i] == 4:
        deer_samples['test'].append(Convert_rgb_to_grayscale_and_normalize(test_data[b'data'][i].tolist()))
pickle.dump(cat_samples, open("cat_samples.pkl", "wb"))
pickle.dump(deer_samples, open("deer_samples.pkl", "wb"))

Test CIFAR dataset with neural network

In [29]:
nn=FullyConnectedFeedForwardNN(1024,16,2)
inputs, targets=[], []
num_epocs, counter = 50, 0
for i in range(len(cat_samples['train'])):
    inputs.append(cat_samples['train'][i])
    targets.append(0)
    inputs.append(deer_samples['train'][i])
    targets.append(1)
nn.train(inputs, targets, num_epocs)
for j in range(0,len(deer_samples['test'])):
    s=nn.predict(deer_samples['test'][j])
    if s == 1 :
        counter+=1
    s=nn.predict(cat_samples['test'][j])
    if s == 0 :
        counter+=1
print("Correctly identified test cases : ",counter)
print("Accuracy : ",((counter*1.0)/((j+1)*2))*100)

Epoch 0 : Loss = 0.6481057288194713
Epoch 1 : Loss = 0.6018717754663375
Epoch 2 : Loss = 0.5900084945414481
Epoch 3 : Loss = 0.5827116520897151
Epoch 4 : Loss = 0.5766489593936781
Epoch 5 : Loss = 0.5714321069858236
Epoch 6 : Loss = 0.5663203621348406
Epoch 7 : Loss = 0.5616899022594215
Epoch 8 : Loss = 0.5553569051801397
Epoch 9 : Loss = 0.5505750779053323
Epoch 10 : Loss = 0.5466899040892228
Epoch 11 : Loss = 0.5431942098618681
Epoch 12 : Loss = 0.5378948125562636
Epoch 13 : Loss = 0.5326189396530271
Epoch 14 : Loss = 0.5303578385247281
Epoch 15 : Loss = 0.5257168378796676
Epoch 16 : Loss = 0.5224526811882015
Epoch 17 : Loss = 0.5170895226103492
Epoch 18 : Loss = 0.5134179212020604
Epoch 19 : Loss = 0.5093199379948439
Epoch 20 : Loss = 0.5044218509333056
Epoch 21 : Loss = 0.5006521026013915
Epoch 22 : Loss = 0.498055499285154
Epoch 23 : Loss = 0.49354810416510886
Epoch 24 : Loss = 0.48918365091739263
Epoch 25 : Loss = 0.48376252412561693
Epoch 26 : Loss = 0.48064675852218924
Epoch 27

Part 4: Suruchi Implementation 
    L2 regularizing. Solves the problem of overfitting

In [35]:
class FullyConnectedNNWithRegulization(object):
    """
    Implementation of a Fully connected feed forward Neural Network. 
    This implementation implements only one hidden layer.
    """
    def __init__(self, input_size, hidden_size, output_size, learning_rate=0.01, reg_lambda=0.01):
        """
        Initialize the network with input, output sizes, weights and biases
        :param input_dim: input dim
        :param hidden_size: number of hidden units
        :param output_dim: output dim
        :param learning_rate: learning rate alpha
        :param reg_lambda: regularization rate lambda
        :return: None
        """
        self.input_size = input_size
        self.output_size = output_size
        self.hidden_size = hidden_size
        self.W_xh = np.random.randn(self.hidden_size, self.input_size) * np.sqrt(2/self.input_size) # Weight matrix for input to hidden
        self.W_hy = np.random.randn(self.output_size, self.hidden_size) * np.sqrt(2/self.hidden_size) # Weight matrix for hidden to output
        self.bias_h = np.zeros((self.hidden_size, 1)) # hidden bias
        self.bias_y = np.zeros((self.output_size, 1)) # output bias
        self.learning_rate = learning_rate
        self.reg_lambda = reg_lambda

    def _forward_propagation(self, X):
        """
        Performs forward pass of the ANN
        :param X: input
        :return: hidden activations, softmax probabilities for the output
        """
        hidden_activations = np.tanh(np.dot(self.W_xh, np.reshape(X,(len(X),1))) + self.bias_h)
        y_s = np.exp(np.dot(self.W_hy, hidden_activations) + self.bias_y)
        prob_values = y_s/np.sum(y_s)
        return hidden_activations, prob_values
    
    def _regularize_weights(self, delta_W_hy, delta_W_xh, W_hy, W_xh):
        """
        Add regularization terms to the weights
        :param dWhy: weight derivative from hidden to output
        :param dWxh: weight derivative from input to hidden
        :param Why: weights from hidden to output
        :param Wxh: weights from input to hidden
        :return: dWhy, dWxh
        """
        delta_W_hy += self.reg_lambda * W_hy
        delta_W_xh += self.reg_lambda * W_xh
        return delta_W_hy, delta_W_xh

    def _update_parameter(self, delta_W_xh, delta_bias_h, delta_W_hy, delta_bias_y):
        """
        Update the weights and biases during gradient descent
        :param dWxh: weight derivative from input to hidden
        :param dbh: bias derivative from input to hidden
        :param dWhy: weight derivative from hidden to output
        :param dby: bias derivative from hidden to output
        :return: None
        """
        self.W_xh += -self.learning_rate * delta_W_xh
        self.bias_h += -self.learning_rate * delta_bias_h
        self.W_hy += -self.learning_rate * delta_W_hy
        self.bias_y += -self.learning_rate * delta_bias_y

    def _back_propagation(self, X, target_class, hidden_activations, prob_values):
        """
        Implementation of the backpropagation algorithm
        :param X: input
        :param t: target
        :param h_a: hidden activation from forward pass
        :param probs: softmax probabilities of output from forward pass
        :return: dWxh, dWhy, dbh, dby
        """
        delta_W_xh, delta_W_hy = np.zeros_like(self.W_xh), np.zeros_like(self.W_hy)
        delta_bias_h, delta_bias_y = np.zeros_like(self.bias_h), np.zeros_like(self.bias_y)
        
        delta_y = np.copy(prob_values)
        delta_y[target_class] -= 1
        delta_W_hy = np.dot(delta_y, hidden_activations.T)
        delta_bias_y += delta_y
        
        delta_h = np.dot(self.W_hy.T, delta_y)  # backprop into h
        delta_h_error = (1 - hidden_activations * hidden_activations) * delta_h # backprop through tanh nonlinearity
        delta_bias_h += delta_h_error
        
        delta_W_xh += np.dot(delta_h_error, np.reshape(X, (len(X), 1)).T)
        return delta_W_xh, delta_W_hy, delta_bias_h, delta_bias_y

    def _calc_smooth_loss(self, loss, len_samples, regularization=None):
        """
        Calculate the smoothened loss over the set of examples
        :param loss: loss calculated for a sample
        :param len_examples: total number of samples in training + validation set
        :param regularizer_type: type of regularizer like L1, L2, Dropout
        :return: smooth loss
        """
        if regularization == 'L2':
            # Add regulatization term to loss
            loss += self.reg_lambda/2 * (np.sum(np.square(self.W_xh)) + np.sum(np.square(self.W_hy)))
            return 1./len_samples * loss
        else:
            return 1./len_samples * loss

    def train(self, inputs, targets, sample_data, num_epochs,model_file = "NNModel.pkl", regularization=None):
        """
        Trains the network by performing forward pass followed by backpropagation
        :param inputs: list of training inputs
        :param targets: list of corresponding training targets
        :param validation_data: tuple of (X,y) where X and y are inputs and targets
        :param num_epochs: number of epochs for training the model
        :param regularizer_type: type of regularizer like L1, L2, Dropout
        :return: None
        """
        for k in range(num_epochs):
            loss = 0
            for i in range(len(inputs)):
                # Forward pass
                hidden_activations, prob_values = self._forward_propagation(inputs[i])
                loss += -np.log(prob_values[targets[i], 0])

                # Backpropagation
                delta_W_xh, delta_W_hy, delta_bias_h, delta_bias_y = self._back_propagation(inputs[i], targets[i], hidden_activations, prob_values)

                # Perform the parameter update with gradient descent
                self._update_parameter(delta_W_xh, delta_bias_h, delta_W_hy, delta_bias_y)
            
            for i in range(len(sample_data[0])):
                # Forward pass
                hidden_activations, prob_values = self._forward_propagation(sample_data[0][i])
                loss += -np.log(prob_values[sample_data[1][i], 0])

                # Backpropagation
                delta_W_xh, delta_W_hy, delta_bias_h, delta_bias_y = self._back_propagation(sample_data[0][i], sample_data[1][i], hidden_activations, prob_values)

                if regularization == 'L2':
                    delta_W_hy, delta_W_xh = self._regularize_weights(delta_W_hy, delta_W_xh, self.W_hy, self.W_xh)

                # Perform the parameter update with gradient descent
                self._update_parameter(delta_W_xh, delta_bias_h, delta_W_hy, delta_bias_y)
            
            if k%1 == 0:
                print("Epoch " + str(k) + " : Loss = " + str(self._calc_smooth_loss(loss, len(inputs), regularization)))
            
        self.save(model_file)


    def predict(self, X):
        """
        Given an input X, emi
        :param X: test input
        :return: the output class
        """
        hidden_activations, prob_values = self._forward_propagation(X)
        # return probs
        return np.argmax(prob_values)

    def save(self, model_file):
        """
        Saves the network to a file
        :param model_file: name of the file where the network should be saved
        :return: None
        """
        pickle.dump(self, open(model_file, 'wb'))

In [36]:
nn = FullyConnectedNNWithRegulization(4,8,4)
inputs = []
targets = []
for i in range(1000):
    num = random.randint(0,3)
    inp = np.zeros((4,))
    inp[num] = 1
    inputs.append(inp)
    targets.append(num)

nn.train(inputs[:800], targets[:800], (inputs[800:], targets[800:]), 10, regularization='L2')
print(nn.predict([1,0,0,0]))
print(nn.predict([0,1,0,0]))
print(nn.predict([0,0,1,0]))
print(nn.predict([0,0,0,1]))

Epoch 0 : Loss = 0.6069727887784129
Epoch 1 : Loss = 0.09155663529892479
Epoch 2 : Loss = 0.047123291175041535
Epoch 3 : Loss = 0.03277604691604552
Epoch 4 : Loss = 0.025917723277330832
Epoch 5 : Loss = 0.02198762202178568
Epoch 6 : Loss = 0.019488991255837185
Epoch 7 : Loss = 0.017791221551063174
Epoch 8 : Loss = 0.016583888749264697
Epoch 9 : Loss = 0.015696866050650866
0
1
2
3


In [None]:
nn=FullyConnectedNNWithRegulization(1024,16,2)
inputs, targets=[], []
num_epocs, counter = 50, 0
for i in range(len(cat_samples['train'])):
    inputs.append(cat_samples['train'][i])
    targets.append(0)
    inputs.append(deer_samples['train'][i])
    targets.append(1)
training_size = int(0.8*len(targets))
validation_size = int(0.2*len(targets))
nn.train(inputs[:training_size], targets[:training_size],(inputs[training_size:],targets[training_size:]) ,num_epocs)
for j in range(0,len(deer_samples['test'])):
    s=nn.predict(deer_samples['test'][j])
    if s == 1 :
        counter+=1
    s=nn.predict(cat_samples['test'][j])
    if s == 0 :
        counter+=1
print("Correctly identified test cases : ",counter)
print("Accuracy : ",((counter*1.0)/((j+1)*2))*100)