In [1]:
%matplotlib inline
import cProfile
import json
import numpy as np
import matplotlib.pyplot as plt
import os
import pandas as pd
from scipy.stats import truncnorm
import timeit

In [2]:
image_size = 28 # width and length
no_of_different_labels = 10 #  i.e. 0, 1, 2, 3, ..., 9
image_pixels = image_size * image_size
data_path = "./../data/"

## Load and visualize the data

In [3]:
test_data = pd.read_csv(data_path + "mnist_test.1k.csv", delimiter=",").values

fac = 0.99 / 255
test_imgs = np.asfarray(test_data[:, 1:], dtype=np.float32) * fac + 0.01
test_imgs = test_imgs.reshape(test_imgs.shape[0], 1, test_imgs.shape[1])

test_labels = np.asfarray(test_data[:, :1], dtype=np.float32)

lr = np.arange(no_of_different_labels)
# transform labels into one hot representation
test_labels_one_hot = (lr==test_labels).astype(np.float32)

# we don't want zeroes and ones in the labels neither:
test_labels_one_hot[test_labels_one_hot==0] = 0.001
test_labels_one_hot[test_labels_one_hot==1] = 0.999

In [4]:
# Base class
class Layer:
    def __init__(self):
        self.input = None
        self.output = None

    # computes the output Y of a layer for a given input X
    def forward_propagation(self, input):
        raise NotImplementedError

    # computes dE/dX for a given dE/dY (and update parameters if any)
    def backward_propagation(self, output_error, learning_rate):
        raise NotImplementedError

In [5]:
# inherit from base class Layer
class FCLayer(Layer):
    # input_size = number of input neurons
    # output_size = number of output neurons
    def __init__(self, input_size, output_size):
        self.weights = np.random.rand(input_size, output_size) - 0.5
        self.bias = np.random.rand(1, output_size) - 0.5

    # returns output for a given input
    def forward_propagation(self, input_data):
        self.input = input_data
        self.output = np.dot(self.input, self.weights) + self.bias
        return self.output

    # computes dE/dW, dE/dB for a given output_error=dE/dY. Returns input_error=dE/dX.
    def backward_propagation(self, output_error, learning_rate):
        input_error = np.dot(output_error, self.weights.T)
        weights_error = np.dot(self.input.T, output_error)
        # dBias = output_error

        # update parameters
        self.weights -= learning_rate * weights_error
        self.bias -= learning_rate * output_error
        return input_error

In [6]:
# inherit from base class Layer
class ActivationLayer(Layer):
    def __init__(self, activation, activation_prime):
        self.activation = activation
        self.activation_prime = activation_prime

    # returns the activated input
    def forward_propagation(self, input_data):
        self.input = input_data
        self.output = self.activation(self.input)
        return self.output

    # Returns input_error=dE/dX for a given output_error=dE/dY.
    # learning_rate is not used because there is no "learnable" parameters.
    def backward_propagation(self, output_error, learning_rate):
        return self.activation_prime(self.input) * output_error

In [7]:
class TanhLayer(ActivationLayer):
    # static
    e = 2.71828182845904523536028747135266249775724709369995
    
    #http://www.plunk.org/~hatch/rightway.php
    #https://math.stackexchange.com/questions/518758/alternative-form-for-sinhx-coshx
    @staticmethod
    def tanh(x):   
        e = TanhLayer.e
        return (1 - e ** (-2 * x)) / (1 + e ** (-2 * x)) 
        #return (1-np.exp(-2 * x))/(1+np.exp(-2 * x))

    @staticmethod
    def tanh_prime(x):
        return 1-TanhLayer.tanh(x)**2
    
    def __init__(self):
        super(TanhLayer,self).__init__(self.tanh, self.tanh_prime)
    

In [8]:
# loss function and its derivative
def mse(y_true, y_pred):
    return np.mean(np.power(y_true-y_pred, 2))

def mse_prime(y_true, y_pred):
    return 2*(y_pred-y_true)/y_true.size;

In [9]:
class Network:
    def __init__(self):
        self.layers = []
        self.loss = None
        self.loss_prime = None

    # add layer to network
    def add(self, layer):
        self.layers.append(layer)

    # set loss to use
    def use(self, loss, loss_prime):
        self.loss = loss
        self.loss_prime = loss_prime

    # predict output for given input
    def predict(self, input_data):
        # sample dimension first
        samples = len(input_data)
        result = []

        # run network over all samples
        for i in range(samples):
            # forward propagation
            output = input_data[i]
            for layer in self.layers:
                output = layer.forward_propagation(output)
#             print(output)
            result.append(output)
            

        return result
    
    def save(self, fname):
        import pickle
        with open(fname, "bw") as fh:
            pickle.dump(self, fh)

    @classmethod
    def load(cls, fname):
        import pickle
        with open(fname, "br") as fh:
            return pickle.load(fh)

In [10]:
net = Network.load('../network.pkl')


In [11]:
first_layer = net.layers[0]
print (first_layer.input.shape)
print (first_layer.output.shape)

second_layer = net.layers[6]
print (second_layer.input.shape)
print (second_layer.output.shape)

(784,)
(1, 80)
(1, 20)
(1, 10)


In [12]:
# Now we start new code
from pynq import Xlnk
from pynq import Overlay

In [13]:
class hardwareLayer(Layer):
    def __init__(self, bit, input_sz, output_sz):
        self.overlay = Overlay(bit)
        self.dma = self.overlay.axi_dma_0
        
        xlnk = Xlnk()        
        self.input_buffer = xlnk.cma_array(
                                shape=(input_sz,), 
                                dtype=np.float32)
        self.output_buffer = xlnk.cma_array(
                                shape=(output_sz,),
                                dtype=np.float32)
        
    def forward_propagation(self, input):
        raise NotImplementedError

    
class firstLayer(hardwareLayer):
    def forward_propagation(self, input):
        # use this for first layer
        np.copyto(self.input_buffer, input)

        self.dma.sendchannel.transfer(self.input_buffer)    
        self.dma.recvchannel.transfer(self.output_buffer)
        
        self.dma.sendchannel.wait()
        self.dma.recvchannel.wait()
        
        #output expects a [1,output_sz] matrix (not vector)
        return self.output_buffer.reshape(
                                1, len(self.output_buffer))
    
class intermediateLayer(hardwareLayer):
    def forward_propagation(self, input):
        # use this for other layers
        np.copyto(self.input_buffer,  input[0])

        self.dma.sendchannel.transfer(self.input_buffer)    
        self.dma.recvchannel.transfer(self.output_buffer)
        
        self.dma.sendchannel.wait()
        self.dma.recvchannel.wait()
        
        #output expects a [1,output_sz] matrix (not vector)
        return self.output_buffer.reshape(
                                1, len(self.output_buffer))

In [14]:
net2 = Network()

# 784 and 10 are hardcoded in bitstream
net2.layers.append(firstLayer('bitstreams/784_10.bit', 784, 10))

# copy all but first layer over to new network
# for layer in net.layers[4:]:
#     net2.layers.append(layer)
    
# 20 and 10 are hardcoded in bitstream
# net2.layers.append(hardwareLayer('last_layer.bit', 20, 10))

for lay1,lay2 in zip(net.layers, net2.layers):
    print ("Net1 Name: " + str(lay1.__class__.__name__) + "\t" +
            "Net2 Name: " + str(lay2.__class__.__name__))



Net1 Name: FCLayer	Net2 Name: firstLayer


In [15]:
print(len(net2.layers))

1


In [16]:
def evaluate(net, data, labels):
    corrects, wrongs = 0, 0
    for i in range(len(data)):
        res = np.array(net.predict(data[i]))
        res = res.argmax()
        if res == labels[i]:
            corrects += 1
        else:
            wrongs += 1
    return corrects, wrongs

In [17]:
start = timeit.default_timer()

corrects, wrongs = evaluate(net, test_imgs, test_labels)

stop = timeit.default_timer()
print("accuracy train: ", corrects / ( corrects + wrongs))
print('Run Time: ' + str(stop - start) + ' Seconds')  


accuracy train:  0.954
Run Time: 2.1707622820104007 Seconds


In [18]:
start = timeit.default_timer()

corrects, wrongs = evaluate(net2, test_imgs, test_labels)

stop = timeit.default_timer()
print("accuracy train: ", corrects / ( corrects + wrongs))
print('Run Time: ' + str(stop - start) + ' Seconds')  

accuracy train:  0.955
Run Time: 1.1567477360076737 Seconds
