1.1 Configure environment dependencies (recommended in the Docker container created by the Ubuntu image)
sudo apt update && sudo apt install -y git cmake g++ python3 python3-setuptools python3-pip libblas-dev liblapack-dev
pip3 install wheel pybind11==2.5.0
git clone https://github.com/WPJiang/HWTL_SDU-ANNS.git
sudo cp HWTL_SDU-ANNS/lib/* /usr/local/lib/
sudo cp HWTL_SDU-ANNS/bin/* /usr/local/bin/
sudo chmod a+x /usr/local/bin/* && chmod a+x HWTL_SDU-ANNS/*
ldconfig
pip3 install HWTL_SDU-ANNS/qsgngt-*-linux_x86_64.whl
python3 qsgngt.py
n, d = 10000, 128
X = np.random.randn(n, d)
def ivecs_read(fname):
a = np.fromfile(fname, dtype='int32')
d = a[0]
return a.reshape(-1, d + 1)[:, 1:].copy()
def fvecs_read(fname):
return ivecs_read(fname).view('float32')
X = fvecs_read("sift_base.fvecs")
import subprocess
import ngtpy
args = [
"ngt",
"create",
"-it",
"-p8",
"-b500",
"-ga",
"-of",
"-D" + self._metric,
"-d" + str(dim),
"-E" + str(self._edge_size),
"-S40",
"-e" + str(self._epsilon),
"-P0",
"-B30",
"-T4",
"-R" + str(self._range),
"-t" + str(self._threshold),
"-M" + str(self._rangeMax),
"-A" + str(self._searchA),
"-H" + str(self._ifES),
anngIndex,
]
subprocess.call(args)
idx = ngtpy.Index(path=anngIndex)
idx.batch_insert(X, num_threads=24, debug=False)
idx.save()
idx.close()
After reading the data set into the variable X, the rest of the operation is the same as randomly generating the data
from sklearn import preprocessing
if self._metric == "E":
X_normalized = preprocessing.normalize(X, norm="l2")
fvecs_dir = "fvecs"
if not os.path.exists(fvecs_dir):
os.makedirs(fvecs_dir)
fvecs = os.path.join(fvecs_dir, "base.fvecs")
with open(fvecs, "wb") as fp:
for y in X_normalized:
d = struct.pack("I", y.size)
fp.write(d)
for x in y:
a = struct.pack("f", x)
fp.write(a)
else:
fvecs_dir = "fvecs"
if not os.path.exists(fvecs_dir):
os.makedirs(fvecs_dir)
fvecs = os.path.join(fvecs_dir, "base.fvecs")
with open(fvecs, "wb") as fp:
for y in X:
d = struct.pack("I", y.size)
fp.write(d)
for x in y:
a = struct.pack("f", x)
fp.write(a)
Build AKNNG using os.system or the command line (where fvecs is where the data set is stored, KNNG is where the KNNG is stored, and SG is where the SpaceGraph is stored)
cmds = (
"/home/app/HWTL_SDU-ANNS/qsgngt-knng "
+ str(fvecs)
+ " "
+ str(KNNG)
+ " "
+ K
+ " "
+ L
+ " "
+ iter
+ " "
+ S
+ " "
+ R
+ "&& /home/app/HWTL_SDU-ANNS/qsgngt-SpaceGraph "
+ str(fvecs)
+ " "
+ str(KNNG)
+ " "
+ SL
+ " "
+ SR
+ " "
+ SAngle
+ " "
+ str(SG)
)
os.system(cmds)
For the angular distance data set, you need to import it for normalization processing, and then run the os.system or command line to invoke qsgngt-knng and qsgngt-SpaceGraph
Dataset | metirc | dim | edge_size | epsilon | range | threshold | rangeMax | searchA | ifES |
---|---|---|---|---|---|---|---|---|---|
SIFT1M | 2 | 128 | 100 | 0.08 | 200 | 60 | 200 | 400 | 0 |
Dataset | K | L | iter | S | R | SL | SR | SAngle |
---|---|---|---|---|---|---|---|---|
SIFT1M | 400 | 400 | 12 | 10 | 100 | 100 | 100 | 60 |
args = [
"ngt",
"reconstruct-graph",
"-mS",
"-E " + str(outdegree),
"-o " + str(outdegree),
"-i " + str(indegree),
anngIndex,
index,
]
subprocess.call(args)
Dataset | outdegree | indegree |
---|---|---|
SIFT1M | 64 | 120 |
args = ["qbg", "create-qg", index]
subprocess.call(args)
args = [
"qbg",
"build-qg",
"-o" + str(sample),
"-M6",
"-ib",
"-I400",
"-Gz",
"-Pn",
"-E" + str(max_edge_size),
index,
]
subprocess.call(args)
Dataset | sample | max_edge_size |
---|---|---|
SIFT1M | 4000 | 96 |
self.index = ngtpy.QuantizedIndex(index, self._max_edge_size)
self.index.set_with_distance(False)
self.indexName = index
self.index.set(epsilon=epsilon, result_expansion=result_expansion)
Taking the SIFT1M dataset as an example, the sample parameters are as follows(Including the corresponding QPS and Recall@10)
Dataset | epsilon | result_expansion | QPS | Recall@10 |
---|---|---|---|---|
SIFT1M | 1.04 | 2.00 | 9902 | 0.9919 |
n, d = 1000, 128
Y = np.random.randn(n, d)
def ivecs_read(fname):
a = np.fromfile(fname, dtype='int32')
d = a[0]
return a.reshape(-1, d + 1)[:, 1:].copy()
def fvecs_read(fname):
return ivecs_read(fname).view('float32')
Y = fvecs_read("sift_base.fvecs")
for v in Y:
self.index.search(v, n)
The team members come from HWTL and SDU.
HWTL: Weipeng Jiang, Wei Han, Bo Bai
SDU: Kun Wang, Zixu Li, Zhiwei Chen, Shanzhi Li, Yupeng Hu, Liqiang Nie