In [28]:
import numpy as np
from numpy import random
from matplotlib import pyplot as plt
%matplotlib inline

In [9]:
def unpickle(file):
    import pickle
    with open(file, 'rb') as fo:
        dict = pickle.load(fo, encoding='bytes')
    return dict

batch_1 = unpickle('cifar-10-batches-py/data_batch_1')
batch_2 = unpickle('cifar-10-batches-py/data_batch_2')
batch_3 = unpickle('cifar-10-batches-py/data_batch_3')
batch_4 = unpickle('cifar-10-batches-py/data_batch_4')
batch_5 = unpickle('cifar-10-batches-py/data_batch_5')

b1_data = batch_1[b'data']
b1_labels = batch_1[b'labels']
b2_data = batch_2[b'data']
b2_labels = batch_2[b'labels']
b3_data = batch_3[b'data']
b3_labels = batch_3[b'labels']
b4_data = batch_4[b'data']
b4_labels = batch_4[b'labels']
b5_data = batch_5[b'data']
b5_labels = batch_5[b'labels']

In [16]:
import numpy as np
""" Format the data """

ally = []
allx = []

numdata = len(b1_data)
for idx in range(0, numdata, 1):
    allx.append(np.matrix(b1_data[idx]))
    ally.append(np.matrix(b1_labels[idx]))
    
ally = [[0 for col in range(10)] for row in range(len(ally))]
for idx in range(0, len(ally)):
    ally[idx][b1_labels[idx]] = 1
allx = [x/255.0 for x in allx]

In [24]:
class Layer(object):
    """
    Layer contains an array of neurons
    """
    
    def __init__(self, *args):
        self.weights = args[0]
        self.bias = args[1]
        """ gd: for gradient descent """
        self.bp2_gd = []
        self.bp3_gd = []
        self.bp4_gd = []
        self.a_gd = []
        
    def __call__(self, x):
        """ 
        Calling on the layer will calculate the output a of the activation function 
        and z, the intermediate calculation that sums the weights and bias
        
        x: list of inputs
        """
        self.z = self.basis_function(self.weights, self.bias, x)
        self.a = self.activation_function(self.sigmoid, self.z)
        return (self.z, self.a)
        
    def basis_function(self, w, b, x):
        z = w.T.dot(x) + b
        return z
    
    def sigmoid(self, a):
        return 1 / (1 + np.exp(-a))
    
    def activation_function(self, fun, z):
        return fun(z)

In [25]:
class Network(object):
    """A Network contains a list of layers, and functions to do feedforward and backpropagation"""
    
    def __init__(self, *args):
        self.layers = []
        
        """
        a list of number of neurons in each layer
        i.e. [2, 2, 1] means that there are 2 neurons in the input layer and 2 in the first and 1 in the second layer
        """
        self.neuron_nums = args[0]
        # x: features to be trained
        self.x = args[1]
        # N: number of training data
        self.N = len(self.x)
        # t: result for comparison
        self.t = args[2]
        """ gd: for gradient descent"""
        self.bp1_gd = []
        self.error = []
        self.error_epoch = []
        
    def initialize_layers(self, neuron_nums, N):
        for i in range(len(neuron_nums) - 1): 
            self.initialize_layer(N, neuron_nums[i], neuron_nums[i+1])
    
    def initialize_layer(self, N, prev, curr, mu=0):
        """ 
        Initializes weights and bias for current layer
        N: number of training data
        prev: number of neurons in previous layer
        curr: number of neurons in current layer

        mu = 0
        sigma = 1 / sqrt(N) in order to avoid network saturation
        """
        mu = 0
        sigma = 1 / np.sqrt(N)

        W = np.zeros((curr, prev))
        b = np.zeros((curr, 1))
        for c in range(curr):
            b[c] = random.normal(mu, sigma)
            for p in range(prev):
                W[c][p] = random.normal(mu, sigma)

        layer = Layer(W.T, b)
        self.layers.append(layer)
    
    def feed_forward(self, layers, x, counter):
        """
        Start off with the first layer, where the input is x and the counter is 0. 
        Then we increase the counter and move to the next layer. 
        The output from the previous layer will become the input for the next layer.

        layers: list of layers in the network
        x: input to the layer
        counter: keep track of which layer we are in
        """
        layer = layers[counter] # get the current layer we are in
        z, a = layer(x) # calculate the value of a and z
        
        # checks whether we the number of z's is the same as the number of neurons in the layer
        assert z.shape == (self.neuron_nums[counter+1], 1)
        assert a.shape == (self.neuron_nums[counter+1], 1)
        
        layer.z = z # save the current value of z in the layer
        layer.a = a # save the current value of a in the layer
        layer.a_gd.append(a.T)
        if (counter==len(layers)-1):
            self.output = a
            return # if we have reached the last layer, return the output
        else:
            self.feed_forward(layers, layer.a, counter+1) # else keep feeding the result forward
    
    def back_prop1(self, layers, t):
        """ 
        Compute error of last layer
        """
        last_layer = layers[len(layers) - 1]
        a = last_layer.a
        z = last_layer.z
        t = t.reshape(a.shape)
        sigmoid = last_layer.sigmoid
        idx = np.argmax(t)
        self.error.append((a[idx] - 1)**2)
        assert a.shape==t.shape
        self.bp1 = np.multiply((a - t), np.multiply(sigmoid(z),(1 - sigmoid(z))))
        assert self.bp1.shape == (self.neuron_nums[len(layers)], 1)
        self.bp1_gd.append(self.bp1)
        
    def back_prop2(self, layers, counter, bp1):
        """ 
        Backpropagating the error 
        """
        if (counter<0):
            return
        
        """ 
        Setting up the parameters:
              From the current layer: z, sigmoid
              From the next layer: w, error 
        """
        current_layer = layers[counter]
        next_layer = layers[counter+1]
        sigmoid = current_layer.sigmoid
        z = current_layer.z
        w = next_layer.weights.T
        assert w.shape == (self.neuron_nums[counter+2], self.neuron_nums[counter+1])
        
        """ Calculating the error for that layer """
        current_layer.bp2 = np.multiply(w.T.dot(bp1), (np.multiply(sigmoid(z), (1 - sigmoid(z)))))
        current_layer.bp2_gd.append(current_layer.bp2)
        assert current_layer.bp2.shape == (self.neuron_nums[counter+1], 1)
        
        """ Recurse and pass the error back """
        self.back_prop2(layers, counter-1, current_layer.bp2)
    
    """ Computing the Gradients back_prop3 and back_prop4 """
    
    def back_prop3(self, layers, x, counter):
        if (counter<0):
            return
        
        """ Setting up the parameters """
        current_layer = layers[counter]
        if (counter==0): # if it is the first layer, 'a' comes from the input layer
            a = x
        else:
            prev_layer = layers[counter-1]
            a = prev_layer.a
        
        if (counter == len(layers) - 1): # if it is the last layer, get bp1
            bp2 = self.bp1
        else: 
            bp2 = current_layer.bp2
        
        current_layer.bp3 = bp2.dot(a.T)
        current_layer.bp3_gd.append(current_layer.bp3)
        assert current_layer.bp3.shape == (self.neuron_nums[counter+1], self.neuron_nums[counter])
        self.back_prop3(layers, x, counter-1)
        
    def back_prop4(self, layers, counter):
        if (counter < 0):
            return
        current_layer = layers[counter]
        if (counter == len(layers) - 1):
            current_layer.bp4 = self.bp1
            current_layer.bp4_gd.append(current_layer.bp4)
        else:
            current_layer.bp4 = current_layer.bp2
            current_layer.bp4_gd.append(current_layer.bp4)
        self.back_prop4(layers, counter-1)
    
    def __call__(self, x, t):
        """ Calling the network will ask it to predict """
        self.feed_forward(self.layers, x, 0)
        print (self.output)
        if (np.argmax(self.output)==0):
            print("Prediction: 0")
            print("Ground Truth: %s" % t)
        else:
            print("Prediction: 1")
            print("Ground Truth: %s" % t)
        
    
    def gradient_descent(self, layers, x, counter, alpha):
        """ alpha refers to the rate of learning """
        if (counter < 0):
            return
        
        """ Setting up the Parameters """
        current_layer = layers[counter]
        if (counter==0):
            a = x
        else:
            prev_layer = layers[counter-1]
            a = prev_layer.a_gd
            
        if (counter == len(layers) - 1): # if it is the last layer, get bp1
            bp2_gd = self.bp1_gd
        else: 
            bp2_gd = current_layer.bp2_gd
        
        w_old = current_layer.weights
        b_old = current_layer.bias
        
        """ Actual calculation for Gradient Descent """
        
        for i in range(50): # the number of times we want to run the gradient descent
            current_layer.bias = b_old - (alpha / self.N) * sum(bp2_gd)
            bp2 = []
            for n in range(len(bp2_gd)):
                bp2.append(bp2_gd[n].dot(a[n]))
            learning_weights = sum(bp2).T
            assert w_old.shape == ((alpha / self.N) * learning_weights).shape
            current_layer.weights = w_old - (alpha / self.N) * learning_weights
            
            b_old = current_layer.bias
            w_old = current_layer.weights
        
        self.gradient_descent(layers, x, counter-1, alpha)
        
    def refresh_network(self):
        layers = self.layers
        for l in layers:
            l.bp2_gd = []
            l.bp3_gd = []
            l.bp4_gd = []
            l.a_gd = []
        self.bp1_gd = []
        self.error = []

In [26]:
class MNISTNetwork(Network):
    def __init__(self, *args):
        self.layers = []
        
        """
        a list of number of neurons in each layer
        i.e. [2, 2, 1] means that there are 2 neurons in the input layer and 2 in the first and 1 in the second layer
        """
        self.neuron_nums = args[0]
        # x: features to be trained
        self.x = args[1]
        # N: number of training data
        self.N = len(self.x)
        # t: result for comparison
        self.t = args[2]
        """ gd: for gradient descent"""
        self.bp1_gd = []
        self.error = []
        self.error_epoch = []
        self.correct_predictions = 0
    
    def __call__(self, x, t):
        """ Calling the network will ask it to predict """
        self.feed_forward(self.layers, x, 0)
        prediction = np.argmax(self.output)
        ground_truth = np.argmax(t)
        print ("Prediction: %s" % prediction)
        print ("Ground Truth: %s" % ground_truth)
        if (prediction==ground_truth):
            self.correct_predictions += 1

In [None]:
from random import randint
neuron_nums = [3072, 300, 200, 150, 10]

cifar10_network = MNISTNetwork(neuron_nums, allx, ally)
cifar10_network.initialize_layers(cifar10_network.neuron_nums, cifar10_network.N)
batch_size = 100

def train(x, t, neuron_nums):    
    for i in range(len(x)):
#         print("Training num: %s" % i)
        cifar10_network.feed_forward(cifar10_network.layers, cifar10_network.x[i].T, 0)
        cifar10_network.back_prop1(cifar10_network.layers, np.array(t[i]))
        cifar10_network.back_prop2(cifar10_network.layers, len(cifar10_network.layers)-2, cifar10_network.bp1)
        cifar10_network.back_prop3(cifar10_network.layers, cifar10_network.x[i].T, len(cifar10_network.layers)-1)
        cifar10_network.back_prop4(cifar10_network.layers, len(cifar10_network.layers)-1)
    cifar10_network.gradient_descent(cifar10_network.layers, x, len(cifar10_network.layers)-1, 0.5)
    cifar10_network.error_epoch.append(np.mean(cifar10_network.error))
#     print("Training complete!")

for i in range(100):
    idx = randint(0, cifar10_network.N - batch_size)
    batchx = allx[idx: idx+batch_size]
    batchy = ally[idx:idx+batch_size]
    train(batchx, batchy, neuron_nums)
    cifar10_network.refresh_network()

[matrix([[ 0.23137255,  0.16862745,  0.19607843, ...,  0.54901961,
           0.32941176,  0.28235294]]),
 matrix([[ 0.60392157,  0.49411765,  0.41176471, ...,  0.54509804,
           0.55686275,  0.56470588]]),
 matrix([[ 1.        ,  0.99215686,  0.99215686, ...,  0.3254902 ,
           0.3254902 ,  0.32941176]]),
 matrix([[ 0.10980392,  0.14509804,  0.14901961, ...,  0.10980392,
           0.14509804,  0.18039216]]),
 matrix([[ 0.66666667,  0.65882353,  0.69411765, ...,  0.32156863,
           0.30588235,  0.31372549]]),
 matrix([[ 0.62352941,  0.58823529,  0.6       , ...,  0.05490196,
           0.06666667,  0.0745098 ]]),
 matrix([[ 0.64313725,  0.41176471,  0.4627451 , ...,  0.11372549,
           0.10196078,  0.17254902]]),
 matrix([[ 0.10980392,  0.11764706,  0.12941176, ...,  0.39215686,
           0.38823529,  0.37647059]]),
 matrix([[ 0.5254902 ,  0.51372549,  0.50196078, ...,  0.53333333,
           0.5372549 ,  0.54117647]]),
 matrix([[ 0.49019608,  0.43137255,  0.4      

In [10]:
""" For each batch, pass it to the neural network """

(10000, 3072)