In [1]:
import os
import sys

import numpy as np
import faiss                   # make faiss available
d = 64                           # dimension
nb = 100000                      # database size
nq = 10000                       # nb of queries
np.random.seed(1234)             # make reproducible
xb = np.random.random((nb, d)).astype('float32')
xb[:, 0] += np.arange(nb) / 1000.
xq = np.random.random((nq, d)).astype('float32')
xq[:, 0] += np.arange(nq) / 1000.

nlist = 100
k = 4

## [Faster Search](https://github.com/facebookresearch/faiss/wiki/Faster-search)

In [6]:
quantizer = faiss.IndexFlatL2(d)  # the other index
index = faiss.IndexIVFFlat(quantizer, d, nlist)
assert not index.is_trained
index.train(xb)
assert index.is_trained

index.add(xb)                  # add may be a bit slower as well
D, I = index.search(xq, k)     # actual search
print(I[-5:])                  # neighbors of the 5 last queries
index.nprobe = 10              # default nprobe is 1, try a few more


[[ 9900  9309  9810 10048]
 [11055 10895 10812 11321]
 [11353 10164  9787 10719]
 [10571 10664 10632 10203]
 [ 9628  9554  9582 10304]]


In [7]:
%%time
D, I = index.search(xq, k)
print(I[-5:])                  # neighbors of the 5 last queries

[[ 9900 10500  9309  9831]
 [11055 10895 10812 11321]
 [11353 11103 10164  9787]
 [10571 10664 10632  9638]
 [ 9628  9554 10036  9582]]
CPU times: user 2.54 s, sys: 414 ms, total: 2.96 s
Wall time: 648 ms


## [Memory Efficient](https://github.com/facebookresearch/faiss/wiki/Lower-memory-footprint#simplifying-index-construction)

In [17]:
%%time
index = faiss.index_factory(d, "IVF100,PQ8")
assert not index.is_trained
index.train(xb)
assert index.is_trained

CPU times: user 7min 24s, sys: 4min 24s, total: 11min 49s
Wall time: 2min 3s


In [21]:
index.add(xb)

In [22]:
index.nprobe = 10

In [23]:
%%time
D, I = index.search(xb[:5], k) # sanity check
print(I)

[[   0   78  608  714]
 [   1 1063  555  208]
 [   2  304  134  179]
 [   3   64    8   16]
 [   4  288  531  159]]
CPU times: user 1.12 ms, sys: 683 µs, total: 1.8 ms
Wall time: 687 µs


## [Running on GPU](https://github.com/facebookresearch/faiss/wiki/Running-on-GPUs#in-python)

In [10]:
res = faiss.StandardGpuResources()  # use a single GPU

In [11]:
%%time
index = faiss.index_factory(d, "IVF100,PQ8")
index = faiss.index_cpu_to_gpu(res, 0, index)
assert not index.is_trained
index.train(xb)
assert index.is_trained

CPU times: user 7.25 s, sys: 187 ms, total: 7.44 s
Wall time: 3.07 s


In [12]:
index.add(xb)

In [13]:
assert index.nprobe == 1
index.nprobe = 10

In [17]:
%%time
D, I = index.search(xb[:5], k) # sanity check
print(I)

[[   0   78  228   85]
 [   1  555 1063  277]
 [   2  304  134   33]
 [   3  139   16 1057]
 [   4  288    8  531]]
CPU times: user 517 µs, sys: 54 µs, total: 571 µs
Wall time: 567 µs


## [10M vectors index training on GPU](https://gist.github.com/mdouze/46d6bbbaabca0b9778fca37ed2bcccf6)

In [18]:
# get some training data
xt = faiss.rand((1000000, 128))

In [19]:
index = faiss.index_factory(128, "PCA64,IVF16384_HNSW32,Flat")

In [20]:
index_ivf = faiss.extract_index_ivf(index)
clustering_index = faiss.index_cpu_to_all_gpus(faiss.IndexFlatL2(index_ivf.d))
index_ivf.clustering_index = clustering_index

In [21]:
%%time
# training with GPU
index.train(xt)

CPU times: user 11.5 s, sys: 2.81 s, total: 14.3 s
Wall time: 7.06 s


In [28]:
index.add(xt)
print(index.ntotal)

1000000


In [33]:
%time D, I = index.search(xt[:5], k) # sanity check
print(I)
print(D)

CPU times: user 598 µs, sys: 47 µs, total: 645 µs
Wall time: 377 µs
[[     0  78792 296224 338379]
 [     1 528663 428339 361233]
 [     2 764229 168178 898249]
 [     3 428937 144700 218694]
 [     4 278144 464718 655490]]
[[8.2384100e-13 5.3181753e+00 5.6431532e+00 5.7314663e+00]
 [1.1174395e-12 4.9125924e+00 5.0084486e+00 5.1111469e+00]
 [4.5996540e-13 4.7565913e+00 4.8978391e+00 5.1389451e+00]
 [1.3641865e-12 5.1118498e+00 5.2657881e+00 5.6154070e+00]
 [7.5450757e-13 5.5794878e+00 6.0319285e+00 6.4769058e+00]]


## 10M vectors index on GPU

In [42]:
# get some training data
xt = faiss.rand((1000000, 128))

In [43]:
index = faiss.index_factory(128, "PCA64,IVF16384_HNSW32,Flat")
res = faiss.StandardGpuResources()  # use a single GPU

In [44]:
%%time
index = faiss.index_cpu_to_gpu(res, 0, index)
assert not index.is_trained
index.train(xt)
assert index.is_trained

CPU times: user 15.1 s, sys: 4.08 s, total: 19.2 s
Wall time: 13.8 s


In [None]:
index = faiss.index_factory(128, "OPQ16_64,IVF16384_HNSW32,PQ16")
res = faiss.StandardGpuResources()  # use a single GPU
index = faiss.index_cpu_to_gpu(res, 0, index)
assert not index.is_trained
%time index.train(xt)
assert index.is_trained

# This does not run on the current gpu with the cuda version

In [None]:
index = faiss.index_factory(128, "OPQ16_64,IVF16384_HNSW32,Flat")
res = faiss.StandardGpuResources()  # use a single GPU
index = faiss.index_cpu_to_gpu(res, 0, index)
assert not index.is_trained
%time index.train(xt)
assert index.is_trained

# This does not run on the current gpu with the cuda version

In [None]:
index = faiss.index_factory(128, "OPQ16_64,IVF16384_HNSW32,Flat")

index_ivf = faiss.extract_index_ivf(index)
clustering_index = faiss.index_cpu_to_all_gpus(faiss.IndexFlatL2(index_ivf.d))
index_ivf.clustering_index = clustering_index

assert not index.is_trained
%time index.train(xt)
assert index.is_trained

# This does not run on the current gpu with the cuda version

In [63]:
index = faiss.index_factory(128, "PCA64,IVF16384_HNSW32,PQ16x4fsr")

index_ivf = faiss.extract_index_ivf(index)
clustering_index = faiss.index_cpu_to_all_gpus(faiss.IndexFlatL2(index_ivf.d))
index_ivf.clustering_index = clustering_index

assert not index.is_trained
%time index.train(xt)
assert index.is_trained


CPU times: user 21 s, sys: 3.75 s, total: 24.7 s
Wall time: 14.7 s


In [69]:
index = faiss.index_factory(128, "PCA64,IVF16384_HNSW32,PQ32x4fsr")

index_ivf = faiss.extract_index_ivf(index)
clustering_index = faiss.index_cpu_to_all_gpus(faiss.IndexFlatL2(index_ivf.d))
index_ivf.clustering_index = clustering_index

assert not index.is_trained
%time index.train(xt)
assert index.is_trained


CPU times: user 22 s, sys: 3.66 s, total: 25.7 s
Wall time: 15 s


In [99]:
index = faiss.index_factory(128, "PCA64,IVF16384_HNSW32,PQ16")
res = faiss.StandardGpuResources()  # use a single GPU
index = faiss.index_cpu_to_gpu(res, 0, index)
assert not index.is_trained
%time index.train(xt)
assert index.is_trained

CPU times: user 25.2 s, sys: 3.76 s, total: 29 s
Wall time: 17 s


In [100]:
index.add(xt)
print(index.ntotal)

1000000


In [104]:
%timeit -r 3 -n 1000 D, I = index.search(xt[:5], k) # sanity check
print(I)
print(D)

285 µs ± 55.5 µs per loop (mean ± std. dev. of 3 runs, 1000 loops each)
[[     0  29968 621621 296224]
 [     1 361233 528663  18251]
 [     2 159180 168178 448653]
 [     3 552619 428937 144700]
 [     4 278144 464718 928596]]
[[0.4225589  4.052941   5.045701   5.2467833 ]
 [0.3379284  4.6040754  4.859063   5.081909  ]
 [0.26284543 4.511919   4.5418663  4.651175  ]
 [0.3073234  4.9794188  5.1886125  5.3649845 ]
 [0.4272653  5.4172373  5.7246256  6.134384  ]]


## [Index IO](https://github.com/facebookresearch/faiss/wiki/Index-IO,-cloning-and-hyper-parameter-tuning#io-and-deep-copying-indexes)

In [86]:
# # check if the index is on the GPU
# INDEX_ON_GPU = isinstance(index, (faiss.GpuIndex, faiss.GpuResources))
# print(f"Index on GPU? {INDEX_ON_GPU}")

# may use
# try:
#     faiss.index_cpu_to_gpu(res, 0, index)

In [105]:
# write to disk and check the size of the saved file
file_path = "/tmp/index.faiss"
try:
    faiss.write_index(index, file_path)
except RuntimeError as e:
    print("probably GPU index. Will try to move to CPU and save again")
    print(f"Original error: {e}")
    index = faiss.index_gpu_to_cpu(index)
    faiss.write_index(index, file_path)

size_in_MB = os.path.getsize(file_path) / 1024 / 1024
print(f"Size of the index: {size_in_MB} MB")

probably GPU index. Will try to move to CPU and save again
Original error: Error in void faiss::write_index(const faiss::Index*, faiss::IOWriter*) at /home/conda/feedstock_root/build_artifacts/faiss-split_1644327786058/work/faiss/impl/index_write.cpp:590: don't know how to serialize this type of index
Size of the index: 27.170928955078125 MB


In [112]:
# test loading
index = faiss.read_index(file_path)
print(index.ntotal)

1000000


In [113]:
%timeit -r 3 -n 1000 D, I = index.search(xt[:5], k) # sanity check
print(I)
print(D)

976 µs ± 125 µs per loop (mean ± std. dev. of 3 runs, 1000 loops each)
[[     0  29968 472524 645664]
 [     1  47593 504210 910686]
 [     2 675420 159396 963202]
 [     3 625427 552619 428937]
 [     4 307978 560944 121648]]
[[0.4225589  4.052941   4.2326136  4.3569345 ]
 [0.3379284  3.4400072  4.4045725  4.4127164 ]
 [0.26284543 3.7331626  3.787204   3.8115711 ]
 [0.3073234  4.755132   4.9794188  5.1886125 ]
 [0.4272653  4.919924   4.93617    5.000271  ]]


In [114]:
# Original nprobe
print(f"Original nprobe: {faiss.downcast_index(index.index).nprobe}")
# unwind the index to find the nprobe and nlist
index_ivf = faiss.extract_index_ivf(index)
print(f"nprobe: {index_ivf.nprobe}")
print(f"nlist: {index_ivf.nlist}")
index_ivf.nprobe = 128
# check the nprobe has been changed
print(f"nprobe set to: {faiss.downcast_index(index.index).nprobe}")

Original nprobe: 1
nprobe: 1
nlist: 16384
nprobe set to: 128


In [115]:
%timeit -r 3 -n 1000 D, I = index.search(xt[:5], k) # sanity check
print(I)
print(D)

2.26 ms ± 523 µs per loop (mean ± std. dev. of 3 runs, 1000 loops each)
[[     0  29968 472524 645664]
 [     1  47593 504210 910686]
 [     2 675420 159396 963202]
 [     3 625427 552619 428937]
 [     4 307978 560944 121648]]
[[0.4225589  4.052941   4.2326136  4.3569345 ]
 [0.3379284  3.4400072  4.4045725  4.4127164 ]
 [0.26284543 3.7331626  3.787204   3.8115711 ]
 [0.3073234  4.755132   4.9794188  5.1886125 ]
 [0.4272653  4.919924   4.93617    5.000271  ]]


In [116]:
index = faiss.index_cpu_to_gpu(res, 0, index)
%timeit -r 3 -n 1000 D, I = index.search(xt[:5], k) # sanity check
print(I)
print(D)

671 µs ± 55.8 µs per loop (mean ± std. dev. of 3 runs, 1000 loops each)
[[     0  29968 472524 645664]
 [     1  47593 504210 910686]
 [     2 675420 159396 963202]
 [     3 625427 552619 428937]
 [     4 307978 560944 121648]]
[[0.4225589  4.052941   4.2326136  4.3569345 ]
 [0.3379284  3.4400072  4.4045725  4.4127164 ]
 [0.26284543 3.7331626  3.787204   3.8115711 ]
 [0.3073234  4.755132   4.9794188  5.1886125 ]
 [0.4272653  4.919924   4.93617    5.000271  ]]
