In [14]:
from pynq import DefaultIP
import numpy as np

class MatMulDriver(DefaultIP):
    def __init__(self, description):
        super().__init__(description=description)

    bindto = ['xilinx.com:user:matmul:1.0']
    
    def reset(self):
        matmul.write(0x0,0)
        matmul.write(0x4,0)

    def start(self):
        #trigger adder by writing 1 to "start" register
        matmul.write(0x0,1)     
    
    def is_done(self):
        #read the value in the "done" register
        return matmul.read(0x4)
    
    def clear_done(self):
        #write 0 to the "start" register to clear it
        matmul.write(0x0, 0)
        #write 1 to the "done" register to clear it
        matmul.write(0x4, 1)
        
    def current_state(self):
        return matmul.read(0x14)
    
    def check_sanity(self):
        return hex(matmul.read(0x24))

class BramDriver(DefaultIP):
    def __init__(self, description):
        super().__init__(description=description)

    bindto = ['xilinx.com:ip:axi_bram_ctrl:4.1']
    
    def write_a(self, a):
        bram_a.write(0,  int((a[3,0]<<24) + (a[2,0]<<16) + (a[1,0]<<8) + (a[0,0])))
        bram_a.write(4,  int((a[3,1]<<24) + (a[2,1]<<16) + (a[1,1]<<8) + (a[0,1])))
        bram_a.write(8,  int((a[3,2]<<24) + (a[2,2]<<16) + (a[1,2]<<8) + (a[0,2])))
        bram_a.write(12, int((a[3,3]<<24) + (a[2,3]<<16) + (a[1,3]<<8) + (a[0,3])))
        
    def write_b(self, b):
        bram_b.write(0,  int((b[0,3]<<24) + (b[0,2]<<16) + (b[0,1]<<8) + (b[0,0])))
        bram_b.write(4,  int((b[1,3]<<24) + (b[1,2]<<16) + (b[1,1]<<8) + (b[1,0])))
        bram_b.write(8,  int((b[2,3]<<24) + (b[2,2]<<16) + (b[2,1]<<8) + (b[2,0])))
        bram_b.write(12, int((b[3,3]<<24) + (b[3,2]<<16) + (b[3,1]<<8) + (b[3,0])))
        
    def read_c(self):
        c = np.ndarray([4,4], dtype=np.uint8)
        for i in range(0,4):
            val = bram_c.read(4*i)
            c[i,0] = ((val & 0x000000ff)>>0)
            c[i,1] = ((val & 0x0000ff00)>>8)
            c[i,2] = ((val & 0x00ff0000)>>16)
            c[i,3] = ((val & 0xff000000)>>24)
        return c        

In [15]:
from pynq import Overlay
overlay = Overlay('/home/aman/overlays/design_1.bit')
matmul = overlay.matmul_0
bram_a = overlay.axi_bram_ctrl_a
bram_b = overlay.axi_bram_ctrl_b
bram_c = overlay.axi_bram_ctrl_c

In [16]:
def do_matmul(a,b):    
    matmul.reset()
    bram_a.write_a(a)
    bram_b.write_b(b)    
    matmul.start()
    while not matmul.is_done():
        pass
    matmul.clear_done()
    c = bram_c.read_c()
    return c

In [25]:
a = np.random.randint(low=0, high=5,size=(4,4), dtype=np.uint8)
print("a=",a)
b = np.random.randint(low=0, high=5,size=(4,4), dtype=np.uint8)
print("b=",b)

a= [[4 4 3 2]
 [2 2 0 0]
 [4 1 2 4]
 [2 0 2 2]]
b= [[2 4 2 0]
 [4 1 3 1]
 [3 1 2 1]
 [4 2 2 4]]


In [26]:
print("Result from overlay running on fpga:")
c_fpga = do_matmul(a,b)
print(c_fpga)

Result from overlay running on fpga:
[[41 27 30 15]
 [12 10 10  2]
 [34 27 23 19]
 [18 14 12 10]]


In [27]:
print("Result from numpy running on cpu:")
c_cpu = np.matmul(a,b)
print("c=", c_cpu)

Result from numpy running on cpu:
c= [[41 27 30 15]
 [12 10 10  2]
 [34 27 23 19]
 [18 14 12 10]]


In [None]:
#That's it

In [None]:
matmul = overlay.matmul_0

In [None]:
bram_a = overlay.axi_bram_ctrl_a
bram_b = overlay.axi_bram_ctrl_b
bram_c = overlay.axi_bram_ctrl_c

In [None]:
#initialize brams
for i in range(4):
    #bram_a.write(i*4,i+1000) 
    #bram_b.write(i*4,i+2000)
    bram_c.write(i*4,i+3000)
    
bram_a.write(0, int('0x09050308',16))
bram_a.write(4, int('0x01020304',16))
bram_a.write(8, int('0x00010306',16))
bram_a.write(12, int('0x05060708',16))
bram_a.write(8188,int('0x00000000',16))

bram_b.write(0, int('0x00030101',16))
bram_b.write(4, int('0x03040100',16))
bram_b.write(8, int('0x01030503',16))
bram_b.write(12, int('0x02030609',16))
bram_b.write(8188,int('0x00000000',16))

In [13]:
#test initialization
print("a=")
for i in range(4):
    print(hex(bram_a.read(i*4)))
    
print("b=")
for i in range(4):
    print(hex(bram_b.read(i*4)))
    
print("c=")
for i in range(4):    
    print(hex(bram_c.read(i*4)))

a=
0x2040302
0x3020301
0x1000102
0x1040101
b=
0x4040201
0x40104
0x3020003
0x3010403
c=
0x1111090f
0x0
0x0
0x0


In [None]:
#just write 0 to register "start"
matmul.write(0x0,0) 

In [12]:
matmul.start()

In [None]:
#just write 0 to register "clear_done"
matmul.write(0x4,0)

In [11]:
#read registers from the IP. check initial/reset state
print(matmul.read(0x0)) #start 
print(matmul.read(0x4)) #done
print(matmul.read(0x8)) #addr_a
print(matmul.read(0xc)) #addr_b
print(matmul.read(0x10)) #addr_c
print(matmul.read(0x14)) #state of fsm
print(matmul.read(0x18)) #rdata_a
print(matmul.read(0x1C)) #rdata_b
print(matmul.read(0x20)) #rdata_c
print(hex(matmul.read(0x24))) #should be deadbeef

0
0
8188
8188
8188
0
0
0
0
0xdeadbeef


In [None]:
matmul.clear_done()

In [None]:
#read registers from the IP. check initial/reset state
print(matmul.read(0x0)) #start 
print(matmul.read(0x4)) #done
print(matmul.read(0x8)) #addr_a
print(matmul.read(0xc)) #addr_b
print(matmul.read(0x10)) #addr_c
print(matmul.read(0x14)) #state of fsm
print(matmul.read(0x18)) #rdata_a
print(matmul.read(0x1C)) #rdata_b
print(matmul.read(0x20)) #rdata_c
print(hex(matmul.read(0x24))) #should be deadbeef

In [None]:
matmul.is_done()

In [None]:
#read registers from the IP. check initial/reset state
print(matmul.read(0x0)) #start 
print(matmul.read(0x4)) #done
print(matmul.read(0x8)) #addr_a
print(matmul.read(0xc)) #addr_b
print(matmul.read(0x10)) #addr_c
print(matmul.read(0x14)) #state of fsm
print(matmul.read(0x18)) #rdata_a
print(matmul.read(0x1C)) #rdata_b
print(matmul.read(0x20)) #rdata_c
print(hex(matmul.read(0x24))) #should be deadbeef

In [None]:
#read bram
print("a=")
for i in range(4):
    print(hex(bram_a.read(i*4)))
    
print("b=")
for i in range(4):    
    print(hex(bram_b.read(i*4)))
    
print("c=")    
for i in range(4):    
    print(hex(bram_c.read(i*4)))

In [None]:
overlay.ip_dict

In [None]:
matmul.check_sanity()
matmul.reset()

In [None]:
matmul.start()

In [None]:
add_ip.write(0x8,2323)

In [None]:
add_ip.read(0x8)

In [None]:
a = AddDriver(overlay.scalar_add.description)
#overlay.scalar_add.add(4,5)

In [None]:
overlay = Overlay('/home/aman/overlays/adder.bit')

In [None]:
overlay?

In [None]:
overlay.scalar_add.add(4,5)

In [None]:
n = np.ndarray([4,4],np.uint8)

In [None]:
print(a)
print(a[:,0])
print(a[0,0])
print(a[1,0])
print(b)
print(b[0,:])

In [None]:
int((a[3,0]<<24) + (a[2,0]<<16) + (a[1,0]<<8) + (a[0,0]))

In [None]:
arr = [[8,4,6,8],[3,3,3,7],[5,2,1,6],[9,1,0,5]]
a = np.array(arr)
print(a)
arr = [[1,1,3,0],[0,1,4,3],[3,5,3,1],[9,6,3,2]]
b = np.array(arr)
print(b)
print(np.matmul(a,b))

In [None]:
bram_a.write_a(a)

In [None]:
bram_b.write_b(b)

In [None]:
bram_c.read_c()

In [None]:
c = do_matmul(a,b)
c

In [None]:
val = bram_c.read(0)
print(val)
print(hex(val))
print(hex((val & 0xff000000) >> 24))

In [None]:
res = np.dot(a,b)

In [None]:
res

In [None]:
np.matmul(a,b)

In [None]:
        #for x in np.nditer(a, order='F'):
        #    print(x)
        #extract each column
        #for x in np.nditer(a[:,0]):
        
        #for x in np.nditer(b, order='C'):
        #    print(x)