In [5]:
import numpy as np
import faiss

#### Indexing

In [12]:
X = np.array([
  [1.0,  2.0,  3.0],
  [1.5,  1.8,  3.2],
  [8.0,  9.0, 10.0],
  [8.5,  9.2, 10.1]
], dtype=np.float32)

# Shape and Size of the data (12 * 4 = 48 bytes)
print(f"Shape of X: {X.shape} and {X.nbytes} in Bytes")


# Define the Hyperparameters
d = X.shape[1]  # vector dimension

index = faiss.IndexScalarQuantizer(
    d,
    faiss.ScalarQuantizer.QT_8bit,  # 8-bit per dimension
    faiss.METRIC_L2
)

index.train(X)
index.add(X)

# Reconstruct stored vectors
X_recon = np.vstack([index.reconstruct(i) for i in range(index.ntotal)])

print("Original:\n", X)
print("Reconstructed:\n", X_recon)
print("Error:\n", X - X_recon)

Shape of X: (4, 3) and 48 in Bytes
Original:
 [[ 1.   2.   3. ]
 [ 1.5  1.8  3.2]
 [ 8.   9.  10. ]
 [ 8.5  9.2 10.1]]
Reconstructed:
 [[ 1.0147059  1.9886274  3.0139215]
 [ 1.5147059  1.8145097  3.2088234]
 [ 8.014706   9.011372  10.002549 ]
 [ 8.514706   9.214509  10.113922 ]]
Error:
 [[-0.0147059   0.01137257 -0.0139215 ]
 [-0.0147059  -0.0145098  -0.00882339]
 [-0.01470566 -0.01137161 -0.00254917]
 [-0.01470566 -0.0145092  -0.01392174]]


#### Retrieval

In [14]:
query = np.array([[1.2, 2.1, 3.1]], dtype="float32")

distances, indices = index.search(query, k=4)

print(indices)
print(distances)

[[0 1 2 3]]
[[5.4147240e-02 1.9238697e-01 1.4185246e+02 1.5331627e+02]]


In [19]:
#### Manual Distance Computation from Reconstructed Vectors
np.power(np.linalg.norm(X_recon - query, axis=1), 2)

array([5.4147240e-02, 1.9238698e-01, 1.4185246e+02, 1.5331627e+02],
      dtype=float32)