# Dataloading to the buffers 
Assumptions:
*   2x2 systolic array is used
*   Files from `/project-group17/python/data/parameters/float32` are used 
 


In [9]:
import numpy as np
from google.colab import files

# DATALOADING SCHEME

In [66]:
# 2x2 Systolic Array Computation ###############################################
#           a0  a1
#           -------        
#         |  A2 A3  |     t1 
#  t1 t0  |  A0 A1  |     t0
# -------   -------
#| x1 x0  | pe0 pe1
#| x2 x3  | pe2 pe3
# -------   -------
#         | om0 om1 |       --> om0 = A0x1+A2x1; om1 = A1x0+A3x1 
#         | om2 om3 |       --> om2 = A0x3+A2x2; om3 = A1x3+A3x2
#           -------
#           out0 out1
#
# where A   represents weight matrix
#       x   represents input matrix
#       pe  represents processing element
#       om  represents output matrix that represents the output from pe after
#           it finishes its execution 
#       t   represents time (t1 > t0)
#       out represents the actual output read by the output SPI slave
###############################################################################
# Full Iteration ##############################################################
# matrices:               x                   A_T

# 1st iteration:

#                     IMAGE_SIZE        MLP_OUTPUT_DIM
#                   --------------     ---------------- 
#                  |  a  a  a  a  |   |a b             | 
#     BATCH_SIZE   |  b  b  b  b  |   |a b             |    IMAGE_SIZE
#                  |              |   |a b             |
#                  |              |   |a b             |
#                   --------------     ----------------

# 2nd iteration:
#                   --------------     ---------------- 
#                  |              |   |  a b           |
#                  |  a  a  a  a  |   |  a b           |
#                  |  b  b  b  b  |   |  a b           |
#                  |              |   |  a b           |
#                   --------------     ----------------

# ...

# (MLP_OUTPUT_DIM+1) iteration:
#                   --------------     ---------------- 
#                  |  b  b  b  b  |   |b              a|
#                  |              |   |b              a|
#                  |              |   |b              a|
#                  |  a  a  a  a  |   |b              a|
#                   --------------     ----------------
###############################################################################


In [None]:
# # Define MLP architecture

# class Net(nn.Module):
#     def __init__(self):
#         super(Net,self).__init__()
#         # number of hidden nodes in each layer (512)
#         hidden_1 = 512
#         hidden_2 = 512
#         # linear layer (784 -> hidden_1)
#         self.fc1 = nn.Linear(28*28, 512)      
#         # linear layer (n_hidden -> hidden_2)
#         self.fc2 = nn.Linear(512,512)
#         # linear layer (n_hidden -> 10)
#         self.fc3 = nn.Linear(512,10)
#         # dropout layer (p=0.2)
#         # dropout prevents overfitting of data
#         self.droput = nn.Dropout(0.2)         # no dropout in the eval() mode (no impact in hw)
        
#     def forward(self,x):
#         # flatten image input
#         x = x.view(-1,28*28)
#         # add hidden layer, with relu activation function
#         x = F.relu(self.fc1(x))
#         # add dropout layer
#         x = self.droput(x)
#         # add hidden layer, with relu activation function
#         x = F.relu(self.fc2(x))
#         # add dropout layer
#         x = self.droput(x)
#         # add output layer
#         x = self.fc3(x)
#         return x
        
# # initialize the NN
# model = Net()
# print(model)

# 1st Layer Weight Matrix
input file : `fc1_weights.dat` (transposed A)

In [53]:
# Testing algorithm

# fc_test = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12])
# print(fc_test.shape)
# print(fc_test)
# fc_test_r = fc_test.reshape(3, -3)
# print(fc_test_r.shape)
# print(fc_test_r)
# fc_test_t = fc_test_r.transpose()
# print(fc_test_t.shape)
# print(fc_test_t)


# for i in range(fc_test_t.shape[1]):
#   print("test iter w0 {}: {}".format(i, fc_test_t[:, (i % fc_test_t.shape[1])]))
#   print("test iter w1 {}: {}".format(i, fc_test_t[:, ((i + 1) % fc_test_t.shape[1])]))

# fc_test_w0 = []
# fc_test_w1 = []
# for i in range(fc_test_t.shape[1]):
#     fc_test_w0.extend(fc_test_t[:, (i % fc_test_t.shape[1])].tolist())
#     print("iter {}: {}".format(i, fc_test_w0))
#     fc_test_w1.extend(fc_test_t[:, ((i + 1) % fc_test_t.shape[1])].tolist())
#     print("iter {}: {}".format(i, fc_test_w1))

(12,)
[ 1  2  3  4  5  6  7  8  9 10 11 12]
(3, 4)
[[ 1  2  3  4]
 [ 5  6  7  8]
 [ 9 10 11 12]]
(4, 3)
[[ 1  5  9]
 [ 2  6 10]
 [ 3  7 11]
 [ 4  8 12]]
test iter w0 0: [1 2 3 4]
test iter w1 0: [5 6 7 8]
test iter w0 1: [5 6 7 8]
test iter w1 1: [ 9 10 11 12]
test iter w0 2: [ 9 10 11 12]
test iter w1 2: [1 2 3 4]
iter 0: [1, 2, 3, 4]
iter 0: [5, 6, 7, 8]
iter 1: [1, 2, 3, 4, 5, 6, 7, 8]
iter 1: [5, 6, 7, 8, 9, 10, 11, 12]
iter 2: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]
iter 2: [5, 6, 7, 8, 9, 10, 11, 12, 1, 2, 3, 4]


In [61]:
# Reading in files
fc1_w = np.genfromtxt('fc1_weights.dat', delimiter=",") 
print("shape of array", fc1_w.shape)
fc1_w_r = fc1_w.reshape(512, -1)

shape of array (401408,)


In [62]:
# Prepare matrices to be read into the buffers in the correct order 

print(fc1_w_r.shape)
# print(fc1_w_r[:3, :3])
fc1_weights = np.transpose(fc1_w_r) # "y = Ax + b"'s A form 
print(fc1_weights.shape)
# print(fc1_weights.shape[1])
print(fc1_weights[:3, :3])

b_w0 = []
b_w1 = []

for i in range(fc1_weights.shape[1]):
    b_w0.extend(fc1_weights[:, (i % fc1_weights.shape[1])].tolist())
    b_w1.extend(fc1_weights[:, ((i + 1) % fc1_weights.shape[1])].tolist())
    if i < 5:
      print("iter {}: {}".format(i, b_w0))
      print("====")
      print("iter {}: {}".format(i, b_w1))

print(b_w0[:3], b_w0[fc1_weights.shape[0]-1:fc1_weights.shape[0]+2])
print(b_w1[:3], b_w1[fc1_weights.shape[0]-1:fc1_weights.shape[0]+2])
print(len(b_w0))

(512, 784)
(784, 512)
[[-0.01254923  0.02262348 -0.00611602]
 [-0.01846748  0.00049243 -0.02410294]
 [-0.00285797 -0.008625   -0.00871783]]
iter 0: [-0.012549231, -0.018467478, -0.0028579705, -0.022266211, 0.01302453, -0.000549078, -0.02949343, -0.020997057, 0.0053291707, 0.014843682, -0.02162736, -0.015245349, 0.022060255, -0.0023352478, 0.002870113, 0.00067939505, 0.006160519, -0.034203764, 0.01816865, 0.0038733783, 0.018138623, 0.025338378, -0.002490563, -0.03078813, -0.009689957, -0.028158184, 0.030529262, 0.018607736, -0.009739106, 0.033854853, 0.020994605, 0.019825518, -0.003418225, 0.023911571, 0.026061295, -0.019330064, 0.023320295, 0.030422317, -0.0057008867, 0.03573957, 0.0077268854, 0.030756412, -0.00026399348, 0.024022728, 0.023331499, 0.01344158, 0.028957428, -0.020253137, 0.012828432, 0.006267222, -0.010752657, -0.01632118, 0.013579552, 0.011633844, 0.010566882, 0.015421455, 0.03282155, 0.00691898, 0.028157772, 0.025639633, 0.023024136, 0.014210482, -0.026009675, -0.00043

In [63]:
np.savetxt('fc1_weights_a0.dat', [b_w0], fmt='%.10f', delimiter=',') # %10.5f : total length = 10 and 5 floating points
np.savetxt('fc1_weights_a1.dat', [b_w1], fmt='%.10f', delimiter=',')

In [64]:
# download from colab
files.download('fc1_weights_a0.dat')
files.download('fc1_weights_a1.dat')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

# 1st Layer Input Matrix 
val_img0, val_img1 = (2, (28 x 28))

Just need to put them directly into the corresponding rows like below:

In [65]:
#                         a0  a1
#                         -------        
#                       |  A2 A3  |     t1 
#                t1 t0  |  A0 A1  |     t0
#               -------   -------
# val_img0 --> | x1 x0  | pe0 pe1
# val_img1 --> | x2 x3  | pe2 pe3
#               -------   -------
#                       | om0 om1 |       --> om0 = A0x1+A2x1; om1 = A1x0+A3x1 
#                       | om2 om3 |       --> om2 = A0x3+A2x2; om3 = A1x3+A3x2
#                         -------
#                        out0 out1

# 1st Layer Bias Vector
input file: 'fc1_bias.dat' (512, 1)

In [None]:
# BASED ON THE ORDER THAT PEx WILL BE OUTPUT TO 

In [None]:
# Reading in files
fc1_b = np.genfromtxt('fc1_bias.dat', delimiter=",") 
print("shape of 'fc1_bias.dat': ", fc1_b.shape)

401408
401408


In [18]:
# Reading in files
fc1_b = np.genfromtxt('fc1_bias.dat', delimiter=",") 
print("shape of 'fc1_bias.dat': ", fc1_b.shape)

shape of 'fc1_bias.dat':  (512,)
