In [1]:
from __future__ import print_function
import os
import sys
import time
import numpy as np
import re
import faiss
from multiprocessing.dummy import Pool as ThreadPool
from matplotlib import pyplot

In [3]:
def mmap_fvecs(fname):
    x = np.memmap(fname, dtype='int32', mode='r')
    d = x[0]
    return x.view('float32').reshape(-1, d + 1)[:, 1:]


def mmap_bvecs(fname):
    x = np.memmap(fname, dtype='uint8', mode='r')
    d = x[:4].view('int32')[0]
    return x.reshape(-1, d + 4)[:, 4:]

def ivecs_read(fname):
    a = np.fromfile(fname, dtype='int32')
    d = a[0]
    return a.reshape(-1, d + 1)[:, 1:].copy()

cluster_num   = 8192
PQ_bytes      = 16

assert PQ_bytes == 16 # (only supports PQ 16 in this script)

dbname        = 'SIFT100M'
index_key     = 'IVF{},PQ{}'.format(cluster_num, PQ_bytes)
parametersets = ['nprobe=32']
topK          = 10

tmpdir = '../trained_CPU_indexes/bench_cpu_{}_{}'.format(dbname, index_key)

if not os.path.isdir(tmpdir):
    raise("%s does not exist")


#################################################################
# Prepare dataset
#################################################################


print("Preparing dataset", dbname)

if dbname.startswith('SIFT'):
    # SIFT1M to SIFT1000M
    dbsize = int(dbname[4:-1])
    xb = mmap_bvecs('../bigann/bigann_base.bvecs')
    xq = mmap_bvecs('../bigann/bigann_query.bvecs')
    xt = mmap_bvecs('../bigann/bigann_learn.bvecs')

    # trim xb to correct size
    xb = xb[:dbsize * 1000 * 1000]

    gt = ivecs_read('../bigann/gnd/idx_%dM.ivecs' % dbsize)

elif dbname == 'Deep1B':
    xb = mmap_fvecs('../deep1b/base.fvecs')
    xq = mmap_fvecs('../deep1b/deep1B_queries.fvecs')
    xt = mmap_fvecs('../deep1b/learn.fvecs')
    # deep1B's train is is outrageously big
    xt = xt[:10 * 1000 * 1000]
    gt = ivecs_read('../deep1b/deep1B_groundtruth.ivecs')

else:
    print('unknown dataset', dbname, file=sys.stderr)
    sys.exit(1)


print("sizes: B %s Q %s T %s gt %s" % (
    xb.shape, xq.shape, xt.shape, gt.shape))

nq, d = xq.shape
nb, d = xb.shape
assert gt.shape[0] == nq


#################################################################
# Load Index
#################################################################

def get_populated_index():

    filename = "%s/%s_%s_populated.index" % (
        tmpdir, dbname, index_key)

    if not os.path.exists(filename):
        raise("Index does not exist!")
    else:
        print("loading", filename)
        index = faiss.read_index(filename)
    return index


Preparing dataset SIFT100M
sizes: B (100000000, 128) Q (10000, 128) T (100000000, 128) gt (10000, 1000)


In [11]:
#################################################################
# Perform searches
#################################################################

index = get_populated_index()

ps = faiss.ParameterSpace()
ps.initialize(index)

# make sure queries are in RAM
xq = xq.astype('float32').copy()

# a static C++ object that collects statistics about searches
ivfpq_stats = faiss.cvar.indexIVFPQ_stats
ivf_stats = faiss.cvar.indexIVF_stats


# we do queries in a single thread
faiss.omp_set_num_threads(1)

print(' ' * len(parametersets[0]), '\t', 'R@10   time    %pass')
# print(' ' * len(parametersets[0]), '\t', 'R@1    R@10   R@100     time    %pass')

for param in parametersets:
    print(param, '\t', end=' ')
    sys.stdout.flush()
    ps.set_index_parameters(index, param)
    t0 = time.time()
    ivfpq_stats.reset()
    ivf_stats.reset()
    D, I = index.search(xq, topK)
    t1 = time.time()
    for rank in [topK]:
        n_ok = (I[:, :rank] == gt[:, :1]).sum()
        print("%.4f" % (n_ok / float(nq)), end=' ')
    print("%8.3f  " % ((t1 - t0) * 1000.0 / nq), end=' ')
    print("%5.2f" % (ivfpq_stats.n_hamming_pass * 100.0 / ivf_stats.ndis))

loading ../trained_CPU_indexes/bench_cpu_SIFT100M_IVF8192,PQ16/SIFT100M_IVF8192,PQ16_populated.index
          	 R@10   time    %pass
nprobe=32 	 0.8132    4.256    0.00


In [17]:
print("==== Distance ====\n\n{}\nshape: {}\n".format(D, D.shape))
print("==== Index ====\n\n{}\nshape: {}\n".format(I, I.shape))

==== Distance ====

[[63253.44  63354.066 64101.27  ... 65175.656 65276.586 65290.062]
 [30681.17  32937.59  34951.28  ... 36604.473 36795.836 36928.688]
 [38950.06  39540.977 39613.273 ... 43175.367 43385.14  43448.914]
 ...
 [51819.89  52301.04  52971.11  ... 56143.484 56264.5   57138.074]
 [47280.098 51085.88  51243.496 ... 54104.6   54629.902 54771.11 ]
 [44446.645 47224.453 48883.477 ... 50725.137 51028.46  51033.734]]
shape: (10000, 10)

==== Index ====

[[40642738 28539420 40156538 ... 12453834 79311056 96419792]
 [33176630 23668085 31792502 ...  4369541 42617250 44060661]
 [17844338 23353312 95657237 ... 23365439 27633542 67818653]
 ...
 [ 9286338 46354463 46817758 ... 29105763 55073159 50719824]
 [74291030  3887666  8126413 ... 98062712 67987096 71671668]
 [ 7703493 53564217  7681219 ...  7204070 46610790 30587893]]
shape: (10000, 10)



In [18]:
print("Query vector example\nxq\ttype: {}\tshape: {}\n".format(type(xq), xq.shape))
print(xq[0], xq[0].shape) 
# Why like integers? -> The sift1b dataset actually stores vector in uint8 format, to save space
#   size = 132000000000 bytes = (128 bytes vector + 4 bytes index) * 1B
print(xq[0][0])

Query vector example
xq	type: <class 'numpy.ndarray'>	shape: (10000, 128)

[  3.   9.  17.  78.  83.  15.  10.   8. 101. 109.  21.   8.   3.   2.
   9.  64.  39.  31.  18.  80.  55.  10.   2.  12.   7.   7.  26.  58.
  32.   6.   4.   3.  14.   2.  13.  28.  37.  19.  47.  59. 109.  22.
   2.   6.  18.  15.  20. 109.  30.   8.  11.  44. 109.  54.  19.  32.
  17.  21.  15.  22.  12.  28. 101.  35.  66.  11.   9.  30.  68.  35.
  30.  75. 106. 103.  26.  50.  76.  20.   8.  13.  51.  41.  63. 109.
  40.   2.   3.  15.  36.  49.  21.  13.  12.   9.  36.  37.  52.  37.
  24.  34.  19.   3.  13.  23.  21.   8.   3.  20.  68.  56.  79.  60.
  99.  36.   7.  28.  78.  41.   7.  21.  74.  26.   3.  15.  34.  15.
  12.  27.] (128,)
3.0


In [211]:
# D -> result distances 
# I -> result vector indexes

folder_name = 'FPGA_data_{}_{}'.format(dbname, index_key)
if not os.path.exists('./saved_npy_data/' + folder_name):
    os.mkdir('./saved_npy_data/' + folder_name)
    
D = np.array(D, dtype=np.float32)
I = np.array(I, dtype=np.int32)
xq = np.array(xq, dtype=np.float32)
print(D.shape, I.shape, xq.shape)
D.tofile("./saved_npy_data/{}/result_distance_float32_{}_{}_raw".format(folder_name, D.shape[0], D.shape[1]))
I.tofile("./saved_npy_data/{}/result_index_int32_{}_{}_raw".format(folder_name, I.shape[0], I.shape[1]))
xq.tofile("./saved_npy_data/{}/query_vectors_float32_{}_{}_raw".format(folder_name, xq.shape[0], xq.shape[1]))

(10000, 10) (10000, 10) (10000, 128)


In [15]:
"""
My Search Functions
"""

coarse_quantizer = faiss.downcast_index(index.quantizer)

def get_sub_quantizer_centroids(index):
    """
    return the sub-quantizer centroids, 
    shape = (m, 256, d / m)
    e.g., d=128, m=16 -> (16, 256, 8)
    """
    pq = index.pq
    cen = faiss.vector_to_array(pq.centroids)
    cen = cen.reshape(pq.M, pq.ksub, pq.dsub)
    
    return cen

def get_coarse_quantizer_centroids(coarse_quantizer):
    """
    return the coarse-grained quantizer centroids,
    shape = (nlist, d),
    e.g., nlist=1024, d=128 -> (1024, 128)
    """
    coarse_cen = faiss.vector_to_array(coarse_quantizer.xb)
#     print("coarse_cen.shape", coarse_cen.shape) # 1024 * 64 = 65536
#     print("coarse_quantizer.ntotal", coarse_quantizer.ntotal)
#     print("coarse_quantizer.d", coarse_quantizer.d)

    coarse_cen = coarse_cen.reshape(coarse_quantizer.ntotal, coarse_quantizer.d)
    return coarse_cen

In [16]:
# Get Sub quantizer info
sub_cen = get_sub_quantizer_centroids(index)
print("==== Sub-quantizer ====\n{}\n\nshape:{}\n".format(sub_cen, sub_cen.shape))

# Get Coarse quantizer info
coarse_cen = get_coarse_quantizer_centroids(coarse_quantizer)
print("==== Coarse-quantizer ====\n{}\n\nshape:{}\n".format(coarse_cen, coarse_cen.shape))

==== Sub-quantizer ====
[[[-1.39830580e+01 -9.50784492e+00 -3.96348667e+00 ...  5.86917877e+00
    1.04514023e+02  1.04102793e+01]
  [-6.90511627e+01 -2.03367958e+01  2.62607670e+00 ...  2.83390975e+00
    1.95819175e+00 -8.36786938e+00]
  [-4.25987320e+01 -9.93877316e+00  1.04937162e+01 ... -1.98086727e+00
   -6.30460835e+00 -2.12152977e+01]
  ...
  [ 1.92724438e+01  2.98870945e+00 -6.91194391e+00 ...  1.94331989e+01
   -9.75370109e-01 -8.94906640e-01]
  [ 2.05092468e+01 -1.09992886e+00 -9.42241001e+00 ...  9.38970661e+00
   -7.21518993e+00 -4.89512920e+00]
  [ 1.51460276e+01  1.51528549e+00 -1.98966455e+00 ... -1.96080756e+00
   -1.24802208e+00  4.54244643e-01]]

 [[-1.38893576e+01 -1.30567741e+01  1.27895641e+01 ... -1.25062037e+00
    4.34972916e+01  7.75580406e+00]
  [ 1.04570885e+01  3.74349747e+01  1.84257901e+00 ... -4.78825521e+00
    1.15119820e+01  5.44087677e+01]
  [-1.12493572e+01 -3.95343399e+00  1.05285339e+01 ... -1.15094595e+01
   -1.70058894e+00 -4.81053877e+00]
  ...

In [212]:
# Save the coarse quantizer and the product quantizer

PQ_quantizer = np.array(sub_cen, dtype=np.float32)
coarse_cen = np.array(coarse_cen, dtype=np.float32)

# 16, 256, 8 -> (0,0,0:8) the first row of the subquantizer of the first sub-vector 
print(PQ_quantizer.shape, coarse_cen.shape)


PQ_quantizer.tofile("./saved_npy_data/{}/product_quantizer_float32_{}_{}_{}_raw".format(
    folder_name, PQ_quantizer.shape[0], PQ_quantizer.shape[1], PQ_quantizer.shape[2]))
coarse_cen.tofile("./saved_npy_data/{}/vector_quantizer_float32_{}_{}_raw".format(
    folder_name, coarse_cen.shape[0], coarse_cen.shape[1]))

(16, 256, 8) (8192, 128)


In [179]:
invlists = index.invlists

def get_invlist(invlists, cluster_id):
    """ 
    returns the (vector IDs set, PQ cose set) of list ID "l"
    list_ids: (#vec_in_list, ), e.g., #vec_in_list=10 -> (10, )
    list_codes: (#vec_in_list, m), e.g., #vec_in_list=10, m=16 -> (10, 16)
    
    That the data is *NOT* copied: if the inverted index is deallocated or changes, accessing the array may crash.
    To avoid this, just clone the output arrays on output. 
    """
    ls = invlists.list_size(cluster_id)
    list_vec_ids = faiss.rev_swig_ptr(invlists.get_ids(cluster_id), ls)
    list_PQ_codes = faiss.rev_swig_ptr(invlists.get_codes(cluster_id), ls * invlists.code_size)
    list_PQ_codes = list_PQ_codes.reshape(-1, invlists.code_size)
    
    return list_vec_ids, list_PQ_codes

In [323]:
# Example of using function "get_invlist"
list_id = 1
list_vec_ids, list_PQ_codes = get_invlist(invlists, list_id)
print("==== Vector IDs ====\n{}\n\nshape: {}\n".format(list_vec_ids, list_vec_ids.shape))
print("==== PQ codes ====\n{}\n\nshape: {}\ndtype:{}\n".format(list_PQ_codes, list_PQ_codes.shape, list_PQ_codes.dtype))

==== Vector IDs ====
[    2640     7939    14687 ... 99975504 99983424 99987628]

shape: (12973,)

==== PQ codes ====
[[230  95 207 ... 206   0 235]
 [131 231  68 ... 232 172  55]
 [ 78 161  35 ...  26 166 197]
 ...
 [217 235  57 ... 190 134  89]
 [ 45  82  57 ... 112  86  87]
 [102 220 191 ... 101   5 143]]

shape: (12973, 16)
dtype:uint8



In [287]:
# Play with the python byte objects

a = int(list_ids[0])
partial_list_ids_bytes = a.to_bytes(length=4, byteorder='big', signed=True)

print("vec_ID: {}, bytes: {}".format(a, partial_list_ids_bytes))
partial_list_codes_bytes = bytes(list_codes[0])
print("list_codes: {}, bytes: {}".format(list_codes[0], partial_list_codes_bytes))

partial_list_id_and_code = partial_list_ids_bytes + partial_list_codes_bytes# concate by "+"
print(type(partial_list_ids_bytes), type(partial_list_codes_bytes))
ls = bytearray(partial_list_id_and_code)
print(ls, len(ls))

vec_ID: 7330, bytes: b'\x00\x00\x1c\xa2'
list_codes: [232 237  49  47 170 123 115 238 183 185  22 157 203  65 119  71], bytes: b'\xe8\xed1/\xaa{s\xee\xb7\xb9\x16\x9d\xcbAwG'
<class 'bytes'> <class 'bytes'>
bytearray(b'\x00\x00\x1c\xa2\xe8\xed1/\xaa{s\xee\xb7\xb9\x16\x9d\xcbAwG') 20


In [324]:
def get_contents_to_HBM(invlists, cluster_id, HBM_bank_num=int(21)):
    """
    For a single cluster (list), extract the contents in the format that HBM loads
      inputs:
        invlists: the Faiss index.invlists object
        cluster_id: e.g., 0~8191 for nlist=8192
        HBM_bank_num: 21 for default, athough there are 32 banks on U280, 
                    we don't have enough hardware logic to load and compute at that rate
      outputs:
        HBM_bank_contents( content of 21 banks): a list of 21 element
            each element is a byte object with a set of contents
            the size of the content is m * 64 bytes
            the contents includes (3 * (int32 vector ID) (16 byte PQ code)) + 4byte padding
        entries_per_bank: int, all HBM shares the same number of 512-bit items to scan
        last_valid_element: int from 0 to 62 (63 numbers in total given 21 HBM channels)
            some of the elements in the last row are paddings, which of them is the last non-padding (valid) 
            
      term:
        entry: a 512-bit entry containing 3 PQ codes
        vector: a 20-byte vector containing 4 byte vector ID + 16 byte PQ code
    """
    
    list_vec_ids, list_PQ_codes = get_invlist(invlists, cluster_id)
#     print("list_vec_ids", list_vec_ids.shape)
#     print("list_PQ_codes", list_PQ_codes.shape)
    num_vec = list_vec_ids.shape[0]
    assert list_vec_ids.shape[0] == list_PQ_codes.shape[0]
    
#     print("num_vec", num_vec)
    
    if num_vec % (HBM_bank_num * 3) == 0:
        # no padding
        entries_per_bank = num_vec / (HBM_bank_num * 3)
        last_valid_element = HBM_bank_num * 3 - 1
        num_vec_per_HBM = [int(num_vec / HBM_bank_num)] * HBM_bank_num
        num_pad_per_HBM = [0] * HBM_bank_num
    else:
        # with padding
        entries_per_bank = int(num_vec / (HBM_bank_num * 3)) + 1
        last_valid_element = num_vec % (HBM_bank_num * 3) - 1
        num_vec_per_HBM = []
        num_pad_per_HBM = []
        
        counted_banks = 0
        # bank with full valid elements
        for i in range(int((last_valid_element + 1) / 3)):
            num_vec_per_HBM += [entries_per_bank * 3]
            num_pad_per_HBM += [0]
        counted_banks += int((last_valid_element + 1) / 3)
        
        # (optional) bank with some valid elements and some padding in the last entry
        if (last_valid_element + 1) % 3 != 0:
            num_vec_per_HBM += [(entries_per_bank - 1) * 3 + (last_valid_element + 1) % 3]
            num_pad_per_HBM += [3 - (last_valid_element + 1) % 3]
            counted_banks += 1
        
        # (optional) bank with full padding in the last entry
        for i in range(HBM_bank_num - counted_banks):
            num_vec_per_HBM += [int((entries_per_bank - 1) * 3)]
            num_pad_per_HBM += [3]
            
    assert np.sum(np.array(num_vec_per_HBM)) == num_vec
    assert entries_per_bank * HBM_bank_num * 3 - np.sum(np.array(num_pad_per_HBM)) == num_vec
    
    HBM_bank_contents = []
    
    start = int(0)
    
    zero = int(0)
    empty_byte = zero.to_bytes(1, "little", signed=True)
    
#     print("num_vec_per_HBM:", num_vec_per_HBM)
#     print("num_pad_per_HBM:", num_pad_per_HBM)
    
    for i in range(HBM_bank_num):
        
        # add valid vectors first
        end = start + num_vec_per_HBM[i]
        vec_per_bank_count = 0
        byte_obj = bytes()
        
#         print(start, end)
        
        for vec_id_per_bank in range(start, end):
            
            # Vec ID = signed int
            vec_id = int(list_vec_ids[vec_id_per_bank])
            # Xilinx's ap int use little endian
            # Linux on X86 use little endian
            # https://serverfault.com/questions/163487/how-to-tell-if-a-linux-system-is-big-endian-or-little-endian
            byte_obj += vec_id.to_bytes(4, "little", signed=True)
            
            # PQ code = unsigned char
            PQ_codes = list_PQ_codes[vec_id_per_bank]
            for code in PQ_codes:
                code = int(code)
                # Xilinx's ap int use little endian
                byte_obj += code.to_bytes(1, "little", signed=False)
            
            vec_per_bank_count += 1
            if vec_per_bank_count % 3 == 0:
                byte_obj += empty_byte * 4
        
        start = end
        
        # then add paddings
        if num_pad_per_HBM[i] > 0:
            for pad_id in range(num_pad_per_HBM[i]):
                byte_obj += empty_byte * 20
            byte_obj += empty_byte * 4
        
        HBM_bank_contents += [byte_obj]
       
    for i in range(HBM_bank_num):
        assert len(HBM_bank_contents[i]) == len(HBM_bank_contents[0])
        assert len(HBM_bank_contents[i]) == 64 * entries_per_bank
    
    return HBM_bank_contents, entries_per_bank, last_valid_element

In [325]:
# Get HBM contents from all clusters

list_HBM_bank_contents = [] # array of cluster_num * HBM_bank_num elements
list_entries_per_bank = []
list_last_valid_element = []

HBM_bank_num = int(21) 

for c in range(cluster_num):
    HBM_bank_contents, entries_per_bank, last_valid_element = get_contents_to_HBM(invlists, c, HBM_bank_num)
    list_HBM_bank_contents += HBM_bank_contents
    list_entries_per_bank += [entries_per_bank]
    list_last_valid_element += [last_valid_element]

In [326]:
# Reorder list_HBM_bank_contents

print(len(list_HBM_bank_contents))
print("list_entries_per_bank:\n", list_entries_per_bank)
print("list_last_valid_element:\n", list_last_valid_element)

list_HBM_bank_contents_reordered = [] # put all contents of the same HBM bank together

for b in range(HBM_bank_num):
    sub_list = []
    for c in range(cluster_num):
        sub_list += [list_HBM_bank_contents[c * HBM_bank_num + b]]
    print(len(sub_list), len(sub_list[0]))
    list_HBM_bank_contents_reordered += [sub_list]
    
print("list_HBM_bank_contents_reordered:", len(list_HBM_bank_contents_reordered), len(list_HBM_bank_contents_reordered[0]))

172032
list_entries_per_bank:
 [151, 206, 141, 242, 245, 102, 211, 171.0, 251, 186, 214, 43, 153, 182, 203, 157, 215, 164, 203, 156, 161, 189, 90, 135, 162, 314, 239, 136, 181, 148, 188, 157, 170, 144, 114, 182, 132, 144, 245, 164, 133, 167, 665, 181, 245, 170, 54, 170, 189, 169, 205, 167, 180, 156, 223, 344, 353, 192, 226, 119, 188, 174, 173, 208, 135, 118, 218, 356, 187, 175, 180, 139.0, 295, 183, 162, 151, 164, 177, 225, 187, 162, 204, 278, 170, 136, 136, 153, 168, 194, 199, 510, 131, 488, 199, 327, 238, 176, 148, 116, 220, 215, 199, 197, 175, 195, 390, 182, 229, 151, 164, 206, 210, 131, 195, 143, 89, 160, 505, 285, 159, 148, 205, 129, 362, 130, 175, 233, 203, 194, 150, 178, 175, 162, 173, 145, 164, 316, 181, 197, 265, 100, 153, 186, 173, 169, 276, 293, 170, 135, 147, 125, 188, 181, 136, 189, 163, 128, 181, 198, 159, 159, 181, 211, 29, 228, 113, 142, 199, 158, 264, 175, 245, 189, 220, 149, 175, 136, 148, 165, 154, 163, 201, 180, 199, 183, 179.0, 116, 139, 212, 99, 234, 191, 158, 159

In [327]:
# Concatenate 

HBM_bank_contents_all = [bytes()] * HBM_bank_num # contents of each bank
for b in range(HBM_bank_num):
    HBM_bank_contents_all[b] = HBM_bank_contents_all[b].join(list_HBM_bank_contents_reordered[b])
    
total_size = np.sum(np.array([len(h) for h in HBM_bank_contents_all]))
print("HBM_bank_contents_all: shape: {}\tsize: {}".format(len(HBM_bank_contents_all), total_size))

HBM_bank_contents_all: shape: 21	size: 2138680320


In [328]:
# Save HBM contents 

for b in range(HBM_bank_num):
    assert len(HBM_bank_contents_all[b]) == len(HBM_bank_contents_all[0])

for b in range(HBM_bank_num):
    with open ('./saved_npy_data/{}/HBM_bank_{}_raw'.format(folder_name, b), 'wb') as f:
        f.write(HBM_bank_contents_all[b])

In [295]:
# Save control contents

#  The format of storing HBM_info_start_addr_and_scanned_entries_every_cell_and_last_element_valid: 
#     8192 start_addr, then 8192 scanned_entries_every_cell, then 8192 last_valid_element
#     int start_addr_LUT[nlist];
#     int scanned_entries_every_cell_LUT[nlist];
#     int last_valid_channel_LUT[nlist];  

list_start_addr_every_cell = [0]
for c in range(cluster_num - 1):
    list_start_addr_every_cell.append(list_start_addr_every_cell[c] + list_entries_per_bank[c])

assert len(list_start_addr_every_cell) == len(list_entries_per_bank) and\
    len(list_start_addr_every_cell) == len(list_last_valid_element)

print(list_start_addr_every_cell[-1])

HBM_info_start_addr_and_scanned_entries_every_cell_and_last_element_valid = \
    list_start_addr_every_cell + list_entries_per_bank + list_last_valid_element

HBM_info_start_addr_and_scanned_entries_every_cell_and_last_element_valid = np.array(
    HBM_info_start_addr_and_scanned_entries_every_cell_and_last_element_valid, dtype=np.int32)

HBM_info_start_addr_and_scanned_entries_every_cell_and_last_element_valid.tofile(
    "./saved_npy_data/{}/HBM_info_start_addr_and_scanned_entries_every_cell_and_last_element_valid_3_by_{}_raw".format(
        folder_name, cluster_num))

1591149.0


In [332]:
# Verify
print('./saved_npy_data/{}/HBM_bank_0_raw'.format(folder_name))
print(os.path.exists('./saved_npy_data/{}/HBM_bank_0_raw'.format(folder_name)))
loaded_HBM_bytes = bytes()
with open ('./saved_npy_data/{}/HBM_bank_0_raw'.format(folder_name), 'rb') as f:
    loaded_HBM_bytes = f.read()
    print(len(loaded_HBM_bytes))

./saved_npy_data/FPGA_data_SIFT100M_IVF8192,PQ16/HBM_bank_0_raw
True
101841920


In [333]:
loaded_HBM_bytes[101841920 - 5]

251

In [334]:
tmp = int.from_bytes(loaded_HBM_bytes[0:4], "little", signed=True)
print(tmp)

# mask =  0b01111111
# byte_from_file = int(loaded_HBM_bytes[0:4])
# value = mask & byte_from_file
# print(value)

16468


In [336]:
for axi_id in range(2):
    print("axi_id: ", axi_id)
    tmp_axi = loaded_HBM_bytes[axi_id * 64: axi_id * 64 + 64]
    for v in range(3):
        vec_ID = int.from_bytes(tmp_axi[v * 20: v * 20 + 4], "little", signed=True)
        print("vec ID:", vec_ID)
        for i in range(16):
            code = int.from_bytes(tmp_axi[v * 20 + 4 + i: v * 20 + 4 + i + 1], "little", signed=False)
            print(code)

axi_id:  0
vec ID: 16468
60
196
35
96
9
220
26
233
109
94
204
37
246
252
54
62
vec ID: 53119
182
126
51
95
122
167
58
171
69
35
92
130
138
53
148
136
vec ID: 106484
207
2
35
175
222
249
174
101
153
139
200
226
162
9
184
171
axi_id:  1
vec ID: 115432
157
110
49
127
189
185
188
198
0
1
217
165
77
66
174
158
vec ID: 116590
236
231
110
83
217
182
252
44
1
32
220
194
41
19
95
196
vec ID: 116621
150
138
250
23
60
214
104
37
39
35
198
234
136
66
184
12


In [314]:
for i in range(16):
    tmp = int.from_bytes(loaded_HBM_bytes[4 + i: 4 + i + 1], "big", signed=False)
    print(tmp)

60
196
35
96
9
220
26
233
109
94
204
37
246
252
54
62


In [322]:
# Example of using function "get_invlist"
list_id = 0
list_vec_ids, list_PQ_codes = get_invlist(invlists, list_id)
print("==== Vector IDs ====\n{}\n\nshape: {}\n".format(list_vec_ids, list_vec_ids.shape))
print("==== PQ codes ====\n{}\n\nshape: {}\ndtype:{}\n".format(list_PQ_codes, list_PQ_codes.shape, list_PQ_codes.dtype))

==== Vector IDs ====
[   16468    53119   106484 ... 99976161 99977969 99992760]

shape: (9452,)

==== PQ codes ====
[[ 60 196  35 ... 252  54  62]
 [182 126  51 ...  53 148 136]
 [207   2  35 ...   9 184 171]
 ...
 [187 126  35 ... 202 184 229]
 [210  94 135 ... 230 253 172]
 [134 130 205 ... 220 220  30]]

shape: (9452, 16)
dtype:uint8



In [None]:
# little endian is false
for i in range(10):
    tmp = int.from_bytes(loaded_HBM_bytes[0 + i * 64: 4 + i * 64], "little", signed=False)
    print(tmp)

In [242]:
len(HBM_bank_contents_all[0])

101841920

In [282]:
second_axi = loaded_HBM_bytes[64+0:64+64]
print(int.from_bytes(second_axi[0:4], "little", signed=True), 
      int.from_bytes(second_axi[20 + 0: 20 + 4], "little", signed=True), 
      int.from_bytes(second_axi[40 + 0:40 + 4], "little", signed=True))

-389938944 1858535680 -1916337920


In [241]:
# Verify
print("./saved_npy_data/{}/product_quantizer_float32_{}_{}_{}_raw".format(
    folder_name, PQ_quantizer.shape[0], PQ_quantizer.shape[1], PQ_quantizer.shape[2]))
loaded_HBM_bytes = bytes()
with open ("./saved_npy_data/{}/product_quantizer_float32_{}_{}_{}_raw".format(
    folder_name, PQ_quantizer.shape[0], PQ_quantizer.shape[1], PQ_quantizer.shape[2]), 'rb') as f:
    loaded_HBM_bytes = f.read()
    print(len(loaded_HBM_bytes))

./saved_npy_data/FPGA_data_SIFT100M_IVF8192,PQ16/product_quantizer_float32_16_256_8_raw
131072
