In [1]:
from __future__ import print_function
import numpy as np

d = 64                           # dimension
nb = 1000000                      # database size
nq = 10000                       # nb of queries
np.random.seed(1234)             # make reproducible
xb = np.random.random((nb, d)).astype('float32')
xb[:, 0] += np.arange(nb) / 1000.
xq = np.random.random((nq, d)).astype('float32')
xq[:, 0] += np.arange(nq) / 1000.

import faiss

nlist = 100
m = 8
k = 4
quantizer = faiss.IndexFlatL2(d)  # this remains the same
index = faiss.IndexIVFPQ(quantizer, d, nlist, m, 8)
                                  # 8 specifies that each sub-vector is encoded as 8 bits
index.train(xb)
index.add(xb)
D, I = index.search(xb[:5], k) # sanity check
print(I)
print(D)
index.nprobe = 10              # make comparable with experiment above
D, I = index.search(xq, k)     # search
print(I[-5:])

Failed to load GPU Faiss: No module named swigfaiss_gpu
Faiss falling back to CPU-only.


[[   0  491  424  584]
 [   1 1063  617  191]
 [   2  304  179  346]
 [   3  484  162  491]
 [   4  288  600  197]]
[[1.5985124 5.5621166 5.7146072 6.0769935]
 [1.7909644 5.721672  6.050125  6.411626 ]
 [1.667192  5.6445165 6.279294  6.7285028]
 [1.8440818 6.3907676 6.543792  6.7020473]
 [1.3459432 5.8926816 6.3004694 6.3125706]]
[[ 9864  9510  9636 10520]
 [10531 10047 10203  9762]
 [ 9229  9469  9616  9510]
 [ 9787 11000 10584 10765]
 [ 9464  9486  9835 10187]]


In [2]:
faiss.write_index(index, '3-IVFPQ.index')

In [3]:
loaded = faiss.read_index('3-IVFPQ.index')

In [4]:
D, I = loaded.search(xb[:5], k) # sanity check
print(I)
print(D)

[[   0  491  424  584]
 [   1 1063  617  191]
 [   2  304  179  346]
 [   3  484  162  491]
 [   4  288  600  197]]
[[1.5985124 5.5621166 5.7146072 6.0769935]
 [1.7909644 5.721672  6.050125  6.411626 ]
 [1.667192  5.6445165 6.279294  6.7285028]
 [1.8440818 6.3907676 6.543792  6.7020473]
 [1.3459432 5.8926816 6.3004694 6.3125706]]


In [5]:
loaded.nprobe = 10              # make comparable with experiment above
D, I = loaded.search(xq, k)     # search
print(I[-5:])

[[ 9864  9510  9636 10520]
 [10531 10047 10203  9762]
 [ 9229  9469  9616  9510]
 [ 9787 11000 10584 10765]
 [ 9464  9486  9835 10187]]
