In [31]:
import numpy as np
from pynq import Xlnk
from pynq import Overlay
import sys
from time import sleep

bitFile = 'cnnaTest3.bit'

overlay = Overlay(bitFile)
dma = overlay.axi_dma_0
cnn = overlay.cnn_0
xlnk = Xlnk()


#fix for the registers...
register_map = {'WEIGHT_CTRLS': 0x14,
                'CTRL_IMAGE_SIZE': 0x1c,
                'CTRL_ROW_SIZE_PKG': 0x24,
                'CTRL_WINDOW_SIZE': 0x2c,                
                'CTRL_DEPTH': 0x34,
                'CTRL_STRIDE': 0x3c,
                'CTRL_REPLAY': 0x44,
                'CTRL_CHANNEL': 0x4c,
                'CTRL_ROW_N': 0x54,
                'START': 0x5c,
                'READY' : 0x64,
                'STATUS_ADD' : 0x6c,
                'STATUS_VAL' : 0x74}

overlay.download()
overlay.reset()

def loadIpParameters():
    #read data_W
    cnn.write(register_map['STATUS_ADD'],3)
    dataW = cnn.read(register_map['STATUS_VAL'])
    #read data_P
    cnn.write(register_map['STATUS_ADD'],4)
    dataP = cnn.read(register_map['STATUS_VAL']) 
    #read input DMA bandwidth
    cnn.write(register_map['STATUS_ADD'],5)
    dmaBW = cnn.read(register_map['STATUS_VAL']) 
    #read PE_N
    cnn.write(register_map['STATUS_ADD'],7)
    peN = cnn.read(register_map['STATUS_VAL']) 
    #read pe_bw_n
    cnn.write(register_map['STATUS_ADD'],8)
    peBwN = cnn.read(register_map['STATUS_VAL']) 
    #
    cnn.write(register_map['STATUS_ADD'],13)
    dataOutN = cnn.read(register_map['STATUS_VAL']) 
    return dataW,dataP,dmaBW,peN,peBwN,dataOutN

DATA_WIDTH, DATA_P, DMA_BW, PE_N, PE_BW, dbOutN = loadIpParameters()

#PE_BW = 4
#PE_N = 2
#dbOutN = 3
#DMA_BW = 2
#DATA_WIDTH = 32
DMA_MULT = DMA_BW

imageSize = 8
nImages = 1
depth = 3
wSize = 3
nW = 64
stride = 2

print("dataWidth: ", DATA_WIDTH, ', dataP: ', DATA_P, ', dmaBW: ', DMA_BW, ', PeN: ', PE_N, ', PeBW: ', PE_BW, ', data_out_n: ', dbOutN)

dataWidth:  32 , dataP:  16 , dmaBW:  2 , PeN:  2 , PeBW:  4 , data_out_n:  3


In [32]:
def convertToFractional(number,p):
    fractional = np.array(np.round(number*(2**p)),dtype=np.uint32)
    return fractional

def generatorFunc(a,b,c,d):
    tmp = ((int(d) & 0xff) << 24) | ((int(c) & 0xff) << 16) | ((int(b) & 0xff) << 8) | ((int(a) & 0xff) << 0)
    return np.uint32( tmp<<16 )

def generateFuncInv(data):
    data = data >> 16
    a = int(data) & 0xFF
    b = (int(data) >> 8) & 0xFF
    c = (int(data) >> 16) & 0xFF
    d = (int(data) >> 24) & 0xFF
    return a, b, c, d

def fixSize(old,align):
    tmp = (old % align)
    if tmp == 0:
        return old
    else:
        return old + (align - tmp)

def doCmd(registerName, value):#TODO: find out if registermap can be used!
    print('doCmd: register: ', registerName, ', value: ', value)
    cnn.write(register_map[registerName.upper()],int(0))
    sleep(0.001)
    while cnn.read(register_map['READY']) == 0:
        pass
    print('value written: ', hex(int(np.left_shift(np.uint32(value),np.uint(1)) + 1)))
    cnn.write(register_map[registerName.upper()],int(np.left_shift(np.uint32(value),np.uint(1)) + 1))
    sleep(0.001)

    #if cnn.read(register_map['READY']) == 1:
    #    print(cnn.read(register_map['READY']))
    #    while cnn.read(register_map['READY']) == 1:
    #        pass
    while cnn.read(register_map['READY']) == 0:
        pass
    #cnn.write(register_map[registerName.upper()],0)


In [33]:
def realignDmaBuffer(data,dmaBuffer,depth,newDepth,dmaBW):
    oriI = 0
    print('length of data: ', len(data), ', length of dma: ', len(dmaBuffer))
    tmpData = np.uint32(0)
    for i in range(len(dmaBuffer)):#*dmaBW):
        #print((i % newDepth) < depth, end=',')
        if (i % newDepth) < depth:
            dmaBuffer[i] = np.uint32(data[oriI])
            #tmpData = np.left_shift(tmpData,np.uint(DATA_WIDTH))
            #print(oriI, ':', hex(data[oriI]))
            #tmpData |= data[oriI]
            oriI += 1
        else:
            dmaBuffer[i] = tmpData
        #if (i % dmaBW) == (dmaBW-1):
            #dmaBuffer[int(i/dmaBW)] = tmpData
            #print('DMA data (', int(i/DMA_BW),', ', len(dmaBuffer), '): ', hex(dmaBuffer[int(i/dmaBW)]))
            #tmpData = np.uint32(0)

In [34]:
def allocDmaArray(imageSize, wSize, depth, nImages, nWindows, stride = 1):
    assert(stride == 1),"Unsupported Stride"
    newDepth = int(fixSize(depth, PE_BW))
    assert(PE_BW % newDepth == 0),"PE_BW unaligned with depth"
    newWSize = int(np.sqrt(fixSize(wSize*wSize,(PE_BW*dbOutN)/newDepth)))
    assert((imageSize*imageSize*newDepth) % DMA_BW == 0),"unalligned new depth"
    assert((newWSize*newWSize*newDepth)%DMA_BW == 0),"unaligned new parameters"
    dmaXSize = DMA_MULT*int((imageSize * imageSize * newDepth * nImages) / DMA_BW)
    dmaXbuffer = np.zeros(shape=(dmaXSize),dtype=np.uint32) 
    
    dmaWSize = DMA_MULT*int((newWSize * newWSize * newDepth * nWindows) / DMA_BW)
    dmaWbuffer = np.zeros(shape=(dmaWSize),dtype=np.uint32)

    ySize = int(imageSize-int(np.floor(wSize/2)*2))
    yDepth = fixSize(nWindows, PE_N)
    dmaYSize = DMA_MULT*int((ySize*ySize*yDepth)/DMA_BW)
    dmaY = xlnk.cma_array(shape=(dmaYSize,),dtype=np.uint32)

    return dmaY, dmaXbuffer, dmaWbuffer, newWSize, newDepth

In [35]:
def doConv(dmaX, dmaW, dmaY, window, depth, rowSize, stride, xN, wN):
    assert(depth % DMA_BW == 0),"unaligned!"
    depthRaw = depth / PE_BW;
    assert((window * window * depth) % (PE_BW * dbOutN) == 0),"unaligned"
    wbCtrlRowN = (window * window * depth) / (PE_BW * dbOutN)
    dbReplay = np.ceil(wN / PE_N)
    assert(stride % 2 == 0 or stride == 1),'invalid stride'
    rowSizeY = (rowSize - (np.floor(window / 2) * 2))/stride
    imageSize = depthRaw*rowSize*rowSize
    # setup
    doCmd('ctrl_depth', depthRaw)
    doCmd('ctrl_stride',stride-1)
    doCmd('ctrl_window_size', window)
    doCmd('ctrl_row_size_pkg', rowSize)
    #setup weight Buffer
    doCmd('ctrl_row_N', wbCtrlRowN);
    doCmd('weight_ctrls', 0);
    doCmd('weight_ctrls', xN * rowSizeY * rowSizeY)
    doCmd('ctrl_channel', 0);
    #setup db
    doCmd('ctrl_replay', dbReplay);
    doCmd('ctrl_image_size', imageSize);
    doCmd('ctrl_channel', 1);
    

    dma.sendchannel.start()
    dma.sendchannel.transfer(dmaW)
    dma.sendchannel.wait()
    dma.sendchannel.start()
    dma.recvchannel.start()
    dma.sendchannel.transfer(dmaX)
    dma.recvchannel.transfer(dmaY)

    #dma.sendchannel.start()
    dma.sendchannel.wait()
    dma.recvchannel.wait()

In [38]:

#here begins the real main 
overlay.download()
overlay.reset()

#reset registers
for register in register_map:
    if register is not 'READY' and register is not 'STATUS_VAL':
        print('resetting: ', register)
        cnn.write(register_map[register],0)

cnn.write(register_map['START'],0xee)

stridedImageSize = int(np.ceil(imageSize/stride))
print(stridedImageSize)
xData = np.zeros(shape=((imageSize*imageSize*depth*nImages),),dtype=np.uint32)
idx = 0
for i in range(nImages):
    for a in range(int(imageSize/stride)):
        for b in range(int(imageSize)):
            for c in range(depth):
                xData[idx] = generatorFunc(a,b,c,i)
                idx += 1

wData = np.ones(shape=(((wSize*wSize*depth*nW)),),dtype=np.uint32)
#for i in range(len(wData)):
#    if int(i % (wSize * wSize * depth)) == int(depth * (np.floor((wSize * wSize) / 2))):
#        wData[i] = 1
#    else:
#        wData[i] = 0

np.set_printoptions(formatter={'int':hex})

print(cnn.read(register_map['READY']))


dmaY, dmaXbuffer, dmaWbuffer, newWSize, newDepth = allocDmaArray(imageSize, wSize, depth, nImages, nW)

realignDmaBuffer(wData, dmaWbuffer, depth, newDepth, DMA_BW)

realignDmaBuffer(xData, dmaXbuffer, depth, newDepth, DMA_BW)

dmaW = xlnk.cma_array(shape=(dmaWbuffer.shape),dtype=np.uint32)
dmaX = xlnk.cma_array(shape=(dmaXbuffer.shape),dtype=np.uint32)


xlnk.cma_memcopy(dmaX,dmaXbuffer,len(dmaXbuffer)*dmaXbuffer.itemsize)
xlnk.cma_memcopy(dmaW,dmaWbuffer,len(dmaWbuffer)*dmaWbuffer.itemsize)
doConv(dmaX, dmaW, dmaY, newWSize, newDepth, imageSize, stride, nImages, nW)

#print(dmaX, dmaW)


resetting:  WEIGHT_CTRLS
resetting:  CTRL_IMAGE_SIZE
resetting:  CTRL_ROW_SIZE_PKG
resetting:  CTRL_WINDOW_SIZE
resetting:  CTRL_DEPTH
resetting:  CTRL_STRIDE
resetting:  CTRL_REPLAY
resetting:  CTRL_CHANNEL
resetting:  CTRL_ROW_N
resetting:  START
resetting:  STATUS_ADD
4
1
length of data:  1728 , length of dma:  2304
length of data:  192 , length of dma:  256
doCmd: register:  ctrl_depth , value:  1.0
value written:  0x3
doCmd: register:  ctrl_stride , value:  1
value written:  0x3
doCmd: register:  ctrl_window_size , value:  3
value written:  0x7
doCmd: register:  ctrl_row_size_pkg , value:  8
value written:  0x11
doCmd: register:  ctrl_row_N , value:  3.0
value written:  0x7
doCmd: register:  weight_ctrls , value:  0
value written:  0x1
doCmd: register:  weight_ctrls , value:  9.0
value written:  0x13
doCmd: register:  ctrl_channel , value:  0
value written:  0x1
doCmd: register:  ctrl_replay , value:  32.0
value written:  0x41
doCmd: register:  ctrl_image_size , value:  64.0
value

In [39]:
for i in range(len(dmaY)):
    print(hex(dmaY[i]))#,end=', ')

0x1b1b
0x1b1b
0x1b1b
0x1b1b
0x1b1b
0x1b1b
0x1b1b
0x1b1b
0x1b1b
0x1b1b
0x1b1b
0x1b1b
0x1b1b
0x1b1b
0x1b1b
0x1b1b
0x1b1b
0x1b1b
0x1b1b
0x1b1b
0x1b1b
0x1b1b
0x1b1b
0x1b1b
0x1b1b
0x1b1b
0x1b1b
0x1b1b
0x1b1b
0x1b1b
0x1b1b
0x1b1b
0x1b1b
0x1b1b
0x1b1b
0x1b1b
0x1b1b
0x1b1b
0x1b1b
0x1b1b
0x1b1b
0x1b1b
0x1b1b
0x1b1b
0x1b1b
0x1b1b
0x1b1b
0x1b1b
0x1b1b
0x1b1b
0x1b1b
0x1b1b
0x1b1b
0x1b1b
0x1b1b
0x1b1b
0x1b1b
0x1b1b
0x1b1b
0x1b1b
0x1b1b
0x1b1b
0x1b1b
0x1b1b
0x511b
0x511b
0x511b
0x511b
0x511b
0x511b
0x511b
0x511b
0x511b
0x511b
0x511b
0x511b
0x511b
0x511b
0x511b
0x511b
0x511b
0x511b
0x511b
0x511b
0x511b
0x511b
0x511b
0x511b
0x511b
0x511b
0x511b
0x511b
0x511b
0x511b
0x511b
0x511b
0x511b
0x511b
0x511b
0x511b
0x511b
0x511b
0x511b
0x511b
0x511b
0x511b
0x511b
0x511b
0x511b
0x511b
0x511b
0x511b
0x511b
0x511b
0x511b
0x511b
0x511b
0x511b
0x511b
0x511b
0x511b
0x511b
0x511b
0x511b
0x511b
0x511b
0x511b
0x511b
0x871b
0x871b
0x871b
0x871b
0x871b
0x871b
0x871b
0x871b
0x871b
0x871b
0x871b
0x871b
0x871b
0x871b
0x871b

0x0
0x0
0x0
0x0
0x0
0x0
0x0
0x0
0x0
0x0
0x0
0x0
0x0
0x0
0x0
0x0
0x0
0x0
0x0
0x0
0x0
0x0
0x0
0x0
0x0
0x0
0x0
0x0
0x0
0x0
0x0
0x0
0x0
0x0
0x0
0x0
0x0
0x0
0x0
0x0
0x0
0x0
0x0
0x0
0x0
0x0
0x0
0x0
0x0
0x0
0x0
0x0
0x0
0x0
0x0
0x0
0x0
0x0
0x0
0x0
0x0
0x0
0x0
0x0
0x0
0x0
0x0
0x0
0x0
0x0
0x0
0x0
0x0
0x0
0x0
0x0
0x0
0x0
0x0
0x0
0x0
0x0
0x0
0x0
0x0
0x0
0x0
0x0
0x0
0x0
0x0
0x0
0x0
0x0
0x0
0x0
0x0
0x0
0x0
0x0
0x0
0x0
0x0
0x0
0x0
0x0
0x0
0x0
0x0
0x0
0x0
0x0
0x0
0x0
0x0
0x0
0x0
0x0
0x0
0x0
0x0
0x0
0x0
0x0
0x0
0x0
0x0
0x0
0x0
0x0
0x0
0x0
0x0
0x0
0x0
0x0
0x0
0x0
0x0
0x0
0x0
0x0
0x0
0x0
0x0
0x0
0x0
0x0
0x0
0x0
0x0
0x0
0x0
0x0
0x0
0x0
0x0
0x0
0x0
0x0
0x0
0x0
0x0
0x0
0x0
0x0
0x0
0x0
0x0
0x0
0x0
0x0
0x0
0x0
0x0
0x0
0x0
0x0
0x0
0x0
0x0
0x0
0x0
0x0
0x0
0x0
0x0
0x0
0x0
0x0
0x0
0x0
0x0
0x0
0x0
0x0
0x0
0x0
0x0
0x0
0x0
0x0
0x0
0x0
0x0
0x0
0x0
0x0
0x0
0x0
0x0
0x0
0x0
0x0
0x0
0x0
0x0
0x0
0x0
0x0
0x0
0x0
0x0
0x0
0x0
0x0
0x0
0x0
0x0
0x0
0x0
0x0
0x0
0x0
0x0
0x0
0x0
0x0
0x0
0x0
0x0
0x0
0x0
0x0
0x0
0x0
0x0
0x0
0x0
0x0


In [None]:
cnn.read(register_map['READY'])

In [9]:
dmaXbuffer.itemsize

4

In [72]:
import time

In [92]:
overlay.download()
overlay.reset()

for register in register_map:
    if register is not 'READY' and register is not 'STATUS_VAL':
        print('resetting: ', register)
        cnn.write(register_map[register],0)

cnn.write(register_map['START'],0xee)

doCmd('ctrl_row_N', int(3));
doCmd('weight_ctrls', int(0));
#doCmd('weight_ctrls', int(36))

#cnn.write(register_map['CTRL_CHANNEL'],int(0))
#time.sleep(0.001)
#cnn.write(register_map['CTRL_CHANNEL'],int(1))
#time.sleep(0.001)
#cnn.write(register_map['CTRL_CHANNEL'],int(0))

doCmd('ctrl_channel', int(0));

dma.sendchannel.start()

dmaW = xlnk.cma_array(shape=(284,),dtype=np.uint64)
dma.sendchannel.transfer(dmaW)
dma.sendchannel.wait()

for i in range(18):
    cnn.write(register_map['STATUS_ADD'],i)
    print('address:',i, ', value:', cnn.read(register_map['STATUS_VAL']))

resetting:  WEIGHT_CTRLS
resetting:  CTRL_IMAGE_SIZE
resetting:  CTRL_ROW_SIZE_PKG
resetting:  CTRL_WINDOW_SIZE
resetting:  CTRL_DEPTH
resetting:  CTRL_STRIDE
resetting:  CTRL_REPLAY
resetting:  CTRL_CHANNEL
resetting:  CTRL_ROW_N
resetting:  START
resetting:  STATUS_ADD
doCmd: register:  ctrl_row_N , value:  3
value written:  0x7
doCmd: register:  weight_ctrls , value:  0
value written:  0x1
doCmd: register:  ctrl_channel , value:  0
value written:  0x1
address: 0 , value: 4294967295
address: 1 , value: 0
address: 2 , value: 284
address: 3 , value: 32
address: 4 , value: 16
address: 5 , value: 2
address: 6 , value: 2
address: 7 , value: 2
address: 8 , value: 4
address: 9 , value: 1024
address: 10 , value: 1024
address: 11 , value: 3
address: 12 , value: 1024
address: 13 , value: 3
address: 14 , value: 142
address: 15 , value: 142
address: 16 , value: 0
address: 17 , value: 4294967295


In [19]:
for i in range(18):
    cnn.write(register_map['STATUS_ADD'],i)
    print('address:',i, ', value:', cnn.read(register_map['STATUS_VAL']))

address: 0 , value: 4294967295
address: 1 , value: 49
address: 2 , value: 1207
address: 3 , value: 32
address: 4 , value: 16
address: 5 , value: 2
address: 6 , value: 2
address: 7 , value: 2
address: 8 , value: 4
address: 9 , value: 1024
address: 10 , value: 1024
address: 11 , value: 3
address: 12 , value: 1024
address: 13 , value: 3
address: 14 , value: 601
address: 15 , value: 576
address: 16 , value: 25
address: 17 , value: 1


In [66]:
dmaW.shape

In [47]:
np.set_printoptions(formatter={'int':hex})


<attribute 'dtype' of 'numpy.generic' objects>

In [49]:
xlnk.cma_array(shape=(1,),dtype=np.uint64)

TypeError: Required argument 'object' (pos 1) not found