In [1]:
import hnswlib
import numpy as np 
import struct
import heapq

def convertBytes(b0, b1, b2, b3, type='int'):
    if(type=='int'):
        return ((b3 & 0xff)<<24) | ((b2 & 0xff)<<16) | ((b1 & 0xff)<<8) | (b0 & 0xff)
    else:
        int8_str = hex (((b3 & 0xff)<<24) | ((b2 & 0xff)<<16) | ((b1 & 0xff)<<8) | (b0 & 0xff))
        hex2bin = struct.pack('<I', int(int8_str, 0))
        bin2float = float(struct.unpack('<f', hex2bin)[0])
        return bin2float

def extractLayer0(data_level0):
    #data_level0 = index_dict["data_level0"]
    size = len(data_level0)
    links_count_l0 = []
    links_l0 = [] 
    data_l0 = []
    label_l0 = []
    label_l0_new = []

    for i in range(size//4):
        b0 = data_level0[4*i]
        b1 = data_level0[4*i+1]
        b2 = data_level0[4*i+2]
        b3 = data_level0[4*i+3]
        j = i % 163
        if (j==0):
            links_count_l0.append(convertBytes(b0, b1, b2, b3, type='int'))
        elif (j >= 1 and j <= 32):
            links_l0.append(convertBytes(b0, b1, b2, b3, type='int'))  
        elif (j >= 33 and j <= 160):
            data_l0.append(convertBytes(b0, b1, b2, b3, type='float'))
        else:
            label_l0.append(convertBytes(b0, b1, b2, b3, type='int'))
    
    for i in range(size//652):
        label_0 = label_l0[2*i]
        label_1 = label_l0[2*i+1]
        label_l0_new.append((label_1<<32 | label_0))
    
    return links_count_l0, links_l0, data_l0, label_l0_new

def extractLinks(element_levels, links_list):
    links = []
    count_links = 0
    size = len(element_levels)
    for i in range(size):
        links_tmp = []
        if (element_levels[i]==0):
            links.append(links_tmp)
        else:
            for j in range(element_levels[i]):
                for k in range(17):
                    b0 = links_list[count_links + 4*k]
                    b1 = links_list[count_links + 4*k+1]
                    b2 = links_list[count_links + 4*k+2]
                    b3 = links_list[count_links + 4*k+3]
                    links_tmp.append(convertBytes(b0, b1, b2, b3, type='int'))  
                count_links += 17 * 4 # 1 for links number, 16 for links
            links.append(links_tmp)
    return links
    
def calculateDist(query_data, currObj, data_l0):
    index = currObj * 128
    currObj_data = data_l0[index:(index+128)]
    query_data = np.array(query_data)
    currObj_data = np.array(currObj_data)
    # return l2 distance between two points
    return np.sum(np.power((query_data-currObj_data), 2))

In [2]:
def searchKnn(q_data, k, ep_node, ef, num_elements, max_level, links_count_l0, links_l0, data_l0, links, label_l0_new):
    currObj = ep_node
    curdist = calculateDist(q_data, currObj, data_l0)
    #for layer (1, max_level), find one closest node
    for level in reversed(range(1, max_level+1)):
        print("")
        print("level: ", level)
        changed = True
        while changed:
            print("current object: ", currObj, ", current distance: ", curdist)
            changed = False
            if (len(links[currObj])==0):
                break
            else:
                start_index = (level-1) * 17
                size = links[currObj][start_index]
                print("size of neighbors: ", size) 
                neighbors = links[currObj][(start_index+1):(start_index+17)]
                for i in range(size):
                    cand = neighbors[i]
                    dist = calculateDist(q_data, cand, data_l0)
                    print("cand: ", cand, ", dist: ", dist)
                    if (dist < curdist):
                        curdist = dist
                        currObj = cand
                        changed = True
                        print("changed")
                print("one node finish")
                print("")

    print("")
    print("level: 0")
    # search in ground layer
    visited_array = np.zeros(num_elements, dtype=int) # default 0
    visited_array_tag = 1
    top_candidates = []
    candidate_set = []
    lowerBound = curdist 
    heapq.heappush(top_candidates, (-curdist, currObj))
    heapq.heappush(candidate_set,(curdist, currObj))
    visited_array[currObj] = visited_array_tag

    while len(candidate_set)!=0:
        current_node_pair = candidate_set[0]
        if ((current_node_pair[0] > lowerBound)):
            break
        heapq.heappop(candidate_set)
        current_node_id = current_node_pair[1]
        print("current object: ", current_node_id)
        size = links_count_l0[current_node_id]
        print("size of neighbors: ", size)
        for i in range(size):
            candidate_id = links_l0[32*current_node_id + i]
            if (visited_array[candidate_id]!=visited_array_tag):
                visited_array[candidate_id] = visited_array_tag
                #currObj1 = data_l0[(128*candidate_id):(128*candidate_id+128)]
                dist = calculateDist(q_data, candidate_id, data_l0)
                print("current object: ", candidate_id, ", current distance: ", dist, ", lowerBound: ", lowerBound)
                if (len(top_candidates) < ef or lowerBound > dist):
                    print("added")
                    heapq.heappush(candidate_set, (dist, candidate_id))
                    heapq.heappush(top_candidates, (-dist, candidate_id))
                if (len(top_candidates) > ef):
                    heapq.heappop(top_candidates)
                if (len(top_candidates)!=0):
                    lowerBound = -top_candidates[0][0]
            else :
                print("current object: ", candidate_id, ", visited already")
        print("one node finishes")
        print("")
    
    while len(top_candidates) > k:
        heapq.heappop(top_candidates)

    result = []
    while len(top_candidates) > 0:
        candidate_pair = top_candidates[0]
        result.append([-candidate_pair[0], label_l0_new[candidate_pair[1]]])
        heapq.heappop(top_candidates)

    return result



In [4]:
dim = 128
num_elements = 1048576
# index_path = 'index.bin'
index_path = '/mnt/scratch/wenqi/hnswlib-eval/indexes_subgraph_kmeans/SIFT10M_10_subgraphs/subgraph_0.bin'

p = hnswlib.Index(space = 'l2', dim = dim)
p.load_index(index_path)
index = p.annData()
index_dict = index[0]

# ef = index_dict["ef"]
ef = 16
max_level = index_dict["max_level"]
data_level0 = index_dict["data_level0"]
enterpoint_node = index_dict["enterpoint_node"]
element_levels = index_dict["element_levels"]
links_list = index_dict["link_lists"]

# data and links in layer0
links_count_l0, links_l0, data_l0, label_l0_new = extractLayer0(data_level0)
# links in other layers
links = extractLinks(element_levels, links_list)

# np.random.seed(1)
# qdata = np.float32(np.random.random((1, dim)))
qdata = [0.0123696, 0.472579,  0.586607, 0.15996, 0.337809, 0.380651, 0.351833, 0.59557, 0.523551, 0.501098, 0.267685, 0.802518, 0.977961, 0.767567, 0.0956629, 0.97753,
        0.67074,   0.0839634, 0.969068, 0.617426, 0.249031, 0.224536, 0.519282, 0.943601, 0.0505886, 0.452829, 0.0905339, 0.413765, 0.136365, 0.8277, 0.309194, 0.766323,
        0.113263,  0.960899,  0.560823, 0.236987, 0.802165, 0.577352, 0.242747, 0.661184, 0.63294, 0.961649, 0.718534, 0.664026, 0.311829, 0.776273, 0.0870592, 0.117126,
        0.180718,  0.606929,  0.191287, 0.000166441, 0.582858, 0.28387, 0.638824, 0.479961, 0.86328, 0.0235504, 0.567014, 0.474131, 0.67512, 0.569185, 0.95862, 0.251585,
        0.673246,  0.793391,  0.524749, 0.280569, 0.351585, 0.454706, 0.108521, 0.318753, 0.0697084, 0.45125, 0.238437, 0.567582, 0.865705, 0.198373, 0.380745, 0.131855,
        0.486811,  0.468224,  0.479084, 0.987974, 0.023348, 0.34968, 0.654999, 0.194506, 0.716907, 0.127722, 0.450073, 0.668612, 0.116613, 0.918388, 0.353905, 0.878461,
        0.728848,  0.174207,  0.926245, 0.315212, 0.489977, 0.718842, 0.928661, 0.315497, 0.875097, 0.146088, 0.881823, 0.80182, 0.327934, 0.288367, 0.751575, 0.323576,
        0.746039,  0.658331,  0.597801, 0.282286, 0.643329, 0.518495, 0.027351, 0.911921, 0.488524, 0.877867, 0.168668, 0.783541, 0.954007, 0.337684, 0.604556, 0.0190392]
qdata = np.asarray(qdata, dtype=float)
qdata = np.reshape(qdata, (-1, 128))

result = searchKnn(qdata, 1, enterpoint_node, ef, num_elements, max_level, links_count_l0, links_l0, data_l0, links, label_l0_new)

AttributeError: 'hnswlib.Index' object has no attribute 'annData'

In [4]:
print(links_l0[(2*32):(2*32+32)])

[1, 0, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 20, 21, 25, 30, 32, 35, 37, 39, 43, 62, 81, 127, 128, 134, 149, 173, 181]


In [5]:
result

[[0.07357056635117745, 114]]