### testing with random vectors

In [1]:
import faiss
import numpy as np

In [67]:
d = 64                           # dimension
nb = 100000                      # database size
nq = 100                         # nb of queries
np.random.seed(1234)             # make reproducible
xb = np.random.random((nb, d)).astype('float32')
xb[:, 0] += np.arange(nb) / 1000.
xq = np.random.random((nq, d)).astype('float32')
xq[:, 0] += np.arange(nq) / 1000.

In [68]:
index = faiss.IndexFlatL2(d)   # build the index    # brute-force L2 distance search
print(index.is_trained)
index.add(xb)                  # add vectors to the index
print(index.ntotal)

True
100000


In [69]:
k = 4                          # we want to see 4 nearest neighbors
D, I = index.search(xb[:5], k) # sanity check
print(I)
print(D)

[[  0 393 363  78]
 [  1 555 277 364]
 [  2 304 101  13]
 [  3 173  18 182]
 [  4 288 370 531]]
[[0.        7.1751738 7.20763   7.2511625]
 [0.        6.3235645 6.684581  6.799946 ]
 [0.        5.7964087 6.391736  7.2815123]
 [0.        7.2779055 7.527987  7.6628466]
 [0.        6.7638035 7.2951202 7.3688145]]


In [70]:
D, I = index.search(xq, k)     # actual search
print(I[:5])                   # neighbors of the 5 first queries
print(I[-5:])                  # neighbors of the 5 last queries

[[ 381  207  210  477]
 [ 526  911  142   72]
 [ 838  527 1290  425]
 [ 196  184  164  359]
 [ 526  377  120  425]]
[[ 801  781  933  385]
 [1073  786 1076  381]
 [ 549  244  100 1008]
 [ 917  140  965   68]
 [ 511  789  225  781]]


In [26]:
D

array([[6.815506 , 6.8894653, 7.3956795, 7.4290257],
       [6.6041107, 6.6796913, 6.7209625, 6.828682 ],
       [6.4703865, 6.8578568, 7.0043793, 7.036564 ],
       ...,
       [6.072693 , 6.5767517, 6.6139526, 6.7323   ],
       [6.637512 , 6.6487427, 6.8578796, 7.0096436],
       [6.218384 , 6.4525146, 6.5487823, 6.581299 ]], dtype=float32)

In [31]:
I.shape

(10000, 4)

In [32]:
D.shape

(10000, 4)

## with own vectors

In [34]:
d = 3
nb = 10
nq = 3

In [57]:
xb = [[0, 0, 0], [1, 2, 3], [1, 2, 2], [1, 2, 4], [3, 4, 5], [6, 7, 8], [1, 0, 0], [4, 2, 1], [3, 5, 6], [2, 4, 6]]
xq = [[1, 2, 3], [0, 0, 1], [4, 5, 6]]
xb = np.array(xb, dtype=np.float32)
xq = np.array(xq, dtype=np.float32)

In [58]:
index = faiss.IndexFlatL2(d)
print(index.is_trained)
index.add(xb)
print(index.ntotal)

True
10


In [61]:
D, I = index.search(xq, k)     # actual search
print(I[:2])                   # neighbors of the 5 first queries
print(I[-2:])                  # neighbors of the 5 last queries

[[1 2 3 4]
 [0 6 2 1]]
[[0 6 2 1]
 [8 4 9 5]]


In [62]:
D

array([[ 0.,  1.,  1., 12.],
       [ 1.,  2.,  6.,  9.],
       [ 1.,  3.,  5., 12.]], dtype=float32)

## with simple word embeddings