In [1]:
from pynq import Overlay
from pynq import MMIO
import numpy as np

# Program bitstream to FPGA
overlay = Overlay('MarginSampling16_10Classes.bit')

# Access to memory map of the AXI GPIO 0
GPIO0_ADDR_BASE = 0x41200000
GPIO0_ADDR_RANGE = 0x10000
gpio0_obj = MMIO(GPIO0_ADDR_BASE, GPIO0_ADDR_RANGE)

# Access to memory map of the BRAM 0
BRAM0_ADDR_BASE = 0x40000000
BRAM0_ADDR_RANGE = 0x8000
bram0_obj = MMIO(BRAM0_ADDR_BASE, BRAM0_ADDR_RANGE)

# Access to memory map of the BRAM 1
BRAM1_ADDR_BASE = 0x42000000
BRAM1_ADDR_RANGE = 0x2000
bram1_obj = MMIO(BRAM1_ADDR_BASE, BRAM1_ADDR_RANGE)

### Functions

In [2]:
def margin_sw(probs, query_batch):

    margins = np.array([_[-1]-_[-2] for _ in np.sort(probs)])
    query_idx_batch = np.argsort(margins)[:query_batch]

    query_idx = np.argsort(margins)
    margins_batch = margins[query_idx_batch]

    return margins, margins_batch, query_idx, query_idx_batch

def margin_hw(probs, query_batch, n_memories):

    '''
    this function is used to get the indexes of the smallest margins using N memories
    where an incoming margin is stored if it is smaller than the max margin, the inputs are probabilities
    N -> number of memories
    '''

    mems = [[] for _ in range(n_memories)]
    mem_sel = 0
    margins = []
    for idx,prob in enumerate(probs):
        margin = np.sort(prob)[-1] - np.sort(prob)[-2]
        margins.append(margin)
        if idx < query_batch-1:
            mems[mem_sel].append((idx,margin))
        elif idx == query_batch-1:
            mems[mem_sel].append((idx,margin))
            maxs = [max(mem, key=lambda x: x[1])[1] for mem in mems]
        elif idx > query_batch-1:
            if margin < maxs[mem_sel]:
                mems[mem_sel].remove(max(mems[mem_sel], key=lambda x: x[1]))
                mems[mem_sel].append((idx,margin))
                maxs[mem_sel] = max(mems[mem_sel], key=lambda x: x[1])[1]

        mem_sel += 1
        if mem_sel == n_memories:
            mem_sel = 0

    query_idx = []
    for mem in mems:
        query_idx += [idx for idx,margin in mem]

    margins_batch = np.array(margins)[query_idx]

    return margins_batch, np.array(query_idx)

def flattener(din, word_width):
    '''
    this function flattens the input data to be written to BRAM
    '''
    dout = []
    for word in din:
        word = ([format(_, '04x') for _ in word])
        while len(word) < word_width//16:
            word = np.append(word, format(0, '04x'))
        word = [word[2*_+1]+word[2*_] for _ in range(word_width//16//2)]
        dout.append(word)

    dout_flattened = [prob for word in dout for prob in word]

    return np.array(dout_flattened)

In [3]:
tb_data = []
tb_data.append([200, 200, 140, 130, 120, 110, 100, 90, 80, 70])               # i=0
tb_data.append([200, 200, 140, 130, 120, 110, 100, 90, 80, 70])               # i=1
tb_data.append([210, 158, 150, 140, 130, 120, 110, 100, 90, 80])              # i=2
tb_data.append([220, 167, 160, 150, 140, 130, 120, 110, 100, 90])             # i=3
tb_data.append([230, 176, 170, 160, 150, 140, 130, 120, 310, 309])            # i=4
tb_data.append([240, 185, 180, 170, 160, 150, 140, 130, 120, 110])            # i=5
tb_data.append([250, 194, 190, 180, 170, 160, 150, 140, 130, 120])            # i=6
tb_data.append([260, 203, 200, 190, 180, 170, 160, 150, 140, 130])            # i=7
tb_data.append([270, 212, 210, 200, 1900, 1899, 170, 160, 150, 140])            # i=8
tb_data.append([280, 221, 220, 210, 200, 190, 180, 170, 360, 359])            # i=9
tb_data.append([290, 230, 220, 210, 200, 190, 180, 170, 160, 150])            # i=10
tb_data.append([300, 239, 230, 220, 210, 200, 190, 180, 170, 160])            # i=11
tb_data.append([310, 248, 240, 230, 220, 210, 200, 190, 180, 170])            # i=12
tb_data.append([320, 257, 250, 240, 230, 220, 210, 200, 190, 180])            # i=13
tb_data.append([335, 334, 260, 250, 240, 230, 220, 210, 200, 190])            # i=14
tb_data.append([340, 275, 270, 260, 250, 240, 230, 220, 210, 200])            # i=15
tb_data.append([350, 284, 280, 400, 402, 270, 240, 230, 220, 210])            # i=16
for _ in range(100):
    tb_data.append([280, 180, 168, 167, 163, 165, 161, 166, 164, 162])        # i=17 to i=116
tb_data.append([100,  99,  95,  90,  85,  80,  75,  70,  65,  60])            # i=117
tb_data.append([200, 198, 180, 170, 160, 150, 140, 130, 120, 110])            # i=118
tb_data.append([300, 297, 245, 240, 235, 230, 225, 220, 215, 210])            # i=119
tb_data.append([400, 396, 380, 370, 360, 350, 340, 330, 320, 310])            # i=120
tb_data.append([500, 495, 480, 470, 460, 450, 440, 430, 420, 410])            # i=121
tb_data.append([600, 594, 580, 570, 560, 550, 540, 530, 520, 510])            # i=122
tb_data.append([700, 693, 680, 670, 660, 650, 640, 630, 620, 610])            # i=123
tb_data.append([800, 792, 780, 770, 760, 750, 740, 730, 720, 710])            # i=124
tb_data.append([900, 891, 880, 870, 860, 850, 840, 830, 820, 810])            # i=125
tb_data.append([1000, 990, 980, 970, 960, 950, 940, 930, 920, 910])           # i=126
tb_data.append([1100, 1089, 1080, 1070, 1060, 1050, 1040, 1030, 1020, 1010])  # i=127
tb_data.append([1200, 1188, 1180, 1170, 1160, 1150, 1140, 1130, 1120, 1110])  # i=128
tb_data.append([1300, 1287, 1280, 1270, 1260, 1250, 1240, 1230, 1220, 1210])  # i=129
tb_data.append([1400, 1386, 1380, 1370, 1360, 1350, 1340, 1330, 1320, 1310])  # i=130
tb_data.append([1500, 1485, 1480, 1470, 1460, 1450, 1440, 1430, 1420, 1410])  # i=131
tb_data.append([1600, 1584, 1580, 1570, 1560, 1550, 1540, 1530, 1520, 1510])  # i=132
tb_data.append([1700, 1683, 1680, 1670, 1660, 1650, 1640, 1630, 1620, 1610])  # i=133
tb_data.append([1800, 1782, 1780, 1770, 1760, 1750, 1740, 1730, 1720, 1710])  # i=134
tb_data.append([1900, 1881, 1880, 1870, 1860, 1850, 1840, 1830, 1820, 1810])  # i=135
tb_data.append([2000, 1980, 1975, 1970, 1960, 1950, 1940, 1930, 1920, 1910])  # i=136
tb_data.append([2100, 2079, 2075, 2070, 2060, 2050, 2040, 2030, 2020, 2010])  # i=137
for _ in range(20):
    tb_data.append([320, 640, 16, 16, 18, 5, 16, 16, 16, 16])                 # i=138 to i=157
tb_data.append([218, 219, 1, 2, 161, 5, 9, 16, 16, 16])                       # i=158
tb_data.append([218, 219, 1, 2, 400, 5, 9, 16, 16, 16])                       # i=159

tb_data = np.array(tb_data, dtype=np.uint16)

print("Shape of tb_data_array:", tb_data.shape)

Shape of tb_data_array: (160, 10)


In [5]:
input_data = flattener(tb_data, 256)
input_data.shape

(1280,)

In [7]:
# Write array to BRAM 0
j = 0;
for i in range(0,input_data.shape[0]*4,4):
    bram0_obj.write(i, int(input_data[j], 16))
    j += 1

In [8]:
# Check BRAM 0
for i in range(0,input_data.shape[0]*4,4):
    print(i, "0x%08X" % (bram0_obj.read(i)))

0 0x00C800C8
4 0x0082008C
8 0x006E0078
12 0x005A0064
16 0x00460050
20 0x00000000
24 0x00000000
28 0x00000000
32 0x00C800C8
36 0x0082008C
40 0x006E0078
44 0x005A0064
48 0x00460050
52 0x00000000
56 0x00000000
60 0x00000000
64 0x009E00D2
68 0x008C0096
72 0x00780082
76 0x0064006E
80 0x0050005A
84 0x00000000
88 0x00000000
92 0x00000000
96 0x00A700DC
100 0x009600A0
104 0x0082008C
108 0x006E0078
112 0x005A0064
116 0x00000000
120 0x00000000
124 0x00000000
128 0x00B000E6
132 0x00A000AA
136 0x008C0096
140 0x00780082
144 0x01350136
148 0x00000000
152 0x00000000
156 0x00000000
160 0x00B900F0
164 0x00AA00B4
168 0x009600A0
172 0x0082008C
176 0x006E0078
180 0x00000000
184 0x00000000
188 0x00000000
192 0x00C200FA
196 0x00B400BE
200 0x00A000AA
204 0x008C0096
208 0x00780082
212 0x00000000
216 0x00000000
220 0x00000000
224 0x00CB0104
228 0x00BE00C8
232 0x00AA00B4
236 0x009600A0
240 0x0082008C
244 0x00000000
248 0x00000000
252 0x00000000
256 0x00D4010E
260 0x00C800D2
264 0x076B076C
268 0x00A000AA
272 0x00

In [9]:
gpio0_obj.write(0, 1)
gpio0_obj.write(0, 0)
while (gpio0_obj.read(8) == 0):
    pass

In [10]:
# Output array
output_data = np.zeros((16,), dtype=np.uint32)

In [11]:
# Read BRAM 1 to array
j = 0
for i in range(0,output_data.shape[0]*4,4):
    output_data[j] = bram1_obj.read(i)
    j += 1

In [12]:
margin_sw_indx = margin_sw(tb_data, 16)[3]
margin_sw_indx

array([  0,   1,  14,   9,   8, 117, 158,   4, 118,  16, 119, 120, 121,
       122, 123, 124], dtype=int32)

In [13]:
margin_hw_indx = margin_hw(tb_data, 16, 4)[1]
margin_hw_indx

array([  0,   4,   8,  16,   1,   9, 117, 121,  14, 118, 122, 158, 119,
       123, 127, 131])

In [14]:
output_data

array([  0,   4,   8,  16,   1, 121,   9, 117, 158, 122, 118,  14, 131,
       127, 123, 119], dtype=uint32)

In [15]:
for _ in margin_hw_indx:
    if _ not in output_data:
        print(_)

In [16]:
del input_data, output_data, tb_data

### Some tests
- input memory depth needs to be changed
- create a function to write 256 bit words like the following code

In [155]:
#write 256-bit word
test_input_data = np.zeros((16,), dtype=np.uint32)
test_input_data[0] = 0x0001000a
test_input_data[1] = 0x0002000b
test_input_data[2] = 0x0003000c
test_input_data[3] = 0x0004000d
test_input_data[4] = 0x0005000a
test_input_data[5] = 0x0006000b
test_input_data[6] = 0x0007000c
test_input_data[7] = 0x0008000d

bram0_obj.write(0, int(test_input_data[0]))
bram0_obj.write(4, int(test_input_data[1]))
bram0_obj.write(8, int(test_input_data[2]))
bram0_obj.write(12, int(test_input_data[3]))
bram0_obj.write(16, int(test_input_data[4]))
bram0_obj.write(20, int(test_input_data[5]))
bram0_obj.write(24, int(test_input_data[6]))
bram0_obj.write(28, int(test_input_data[7]))

print("0x%08X" % (bram0_obj.read(0)))
print("0x%08X" % (bram0_obj.read(4)))
print("0x%08X" % (bram0_obj.read(8)))
print("0x%08X" % (bram0_obj.read(12)))
print("0x%08X" % (bram0_obj.read(16)))
print("0x%08X" % (bram0_obj.read(20)))
print("0x%08X" % (bram0_obj.read(24)))
print("0x%08X" % (bram0_obj.read(28)))

result = bram0_obj.read(0)
result |= bram0_obj.read(4) << 32
result |= bram0_obj.read(8) << 64
result |= bram0_obj.read(12) << 96
result |= bram0_obj.read(16) << 128
result |= bram0_obj.read(20) << 160
result |= bram0_obj.read(24) << 192
result |= bram0_obj.read(28) << 224

print("0x%064X" % result)

0x0001000A
0x0002000B
0x0003000C
0x0004000D
0x0005000A
0x0006000B
0x0007000C
0x0008000D
0x0008000D0007000C0006000B0005000A0004000D0003000C0002000B0001000A


In [10]:
#write 256-bit word
input_data = np.zeros((16,), dtype=np.uint32)
input_data[0] = 0x0001000a
input_data[1] = 0x0002000b
input_data[2] = 0x0003000c
input_data[3] = 0x0004000d
input_data[4] = 0x0005000a
input_data[5] = 0x0006000b
input_data[6] = 0x0007000c
input_data[7] = 0x000f000d

bram0_obj.write(0, int(input_data[0]))
bram0_obj.write(4, int(input_data[1]))
bram0_obj.write(8, int(input_data[2]))
bram0_obj.write(12, int(input_data[3]))
bram0_obj.write(16, int(input_data[4]))
bram0_obj.write(20, int(input_data[5]))
bram0_obj.write(24, int(input_data[6]))
bram0_obj.write(28, int(input_data[7]))

print("0x%08X" % (bram0_obj.read(0)))
print("0x%08X" % (bram0_obj.read(4)))
print("0x%08X" % (bram0_obj.read(8)))
print("0x%08X" % (bram0_obj.read(12)))
print("0x%08X" % (bram0_obj.read(16)))
print("0x%08X" % (bram0_obj.read(20)))
print("0x%08X" % (bram0_obj.read(24)))
print("0x%08X" % (bram0_obj.read(28)))

result = bram0_obj.read(0)
result |= bram0_obj.read(4) << 32
result |= bram0_obj.read(8) << 64
result |= bram0_obj.read(12) << 96
result |= bram0_obj.read(16) << 128
result |= bram0_obj.read(20) << 160
result |= bram0_obj.read(24) << 192
result |= bram0_obj.read(28) << 224

print("0x%064X" % result)

0x0001000A
0x0002000B
0x0003000C
0x0004000D
0x0005000A
0x0006000B
0x0007000C
0x000F000D
0x000F000D0007000C0006000B0005000A0004000D0003000C0002000B0001000A
