In [None]:
# FAISS - Facebook AI Similarity Search (Open source library from Meta AI), self hosted library!
# It's a powerful tool that gives you full control over building and managing
# indexes locally

In [None]:
!pip install faiss-cpu

Collecting faiss-cpu
  Downloading faiss_cpu-1.12.0-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl.metadata (5.1 kB)
Downloading faiss_cpu-1.12.0-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl (31.4 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m31.4/31.4 MB[0m [31m67.3 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: faiss-cpu
Successfully installed faiss-cpu-1.12.0


In [None]:
import faiss
import numpy as np
from sentence_transformers import SentenceTransformer

In [None]:
# 1. Prepare the data!
documents = [
    "Kitten care includes frequent feeding, grooming, and vaccinations.",
    "Electric cars are eco-friendly vehicles powered by batteries.",
    "The policy on pre-closing a personal loan requires a 2% fee.",
    "Cats are playful and independent pets."
]

In [None]:
# FAISS :
# 2. Creating embeddings!
model = SentenceTransformer('all-MiniLM-L6-V2')
embeddings = model.encode(documents).astype('float32') # FAISS requires float32

In [None]:
embeddings

array([[-2.0326157e-03,  9.7265569e-05,  6.5966897e-02, ...,
         5.1191133e-03,  8.8277921e-02, -3.6147968e-03],
       [-4.4580633e-03,  1.2786028e-01,  1.2901827e-02, ...,
         8.5954921e-04,  7.6711394e-02,  2.2649251e-02],
       [ 1.0313295e-01,  7.9558501e-03,  8.5745500e-03, ...,
         2.3292959e-02, -5.1684812e-02, -8.2520977e-02],
       [ 9.8080210e-02, -3.8021132e-02,  2.6658099e-02, ...,
         7.4474484e-02,  7.1012355e-02,  4.5340654e-02]], dtype=float32)

In [None]:
N, d = embeddings.shape
print(f"Created {N} embeddings with dimension {d}.")


Created 4 embeddings with dimension 384.


In [None]:
# 3. BUILD FAISS HNSW. Index!
# a. Normalize embeddings to calc cosine similairty
faiss.normalize_L2(embeddings)

In [None]:
# b. Creating the HNSW index
# M : number of neighbors for each node in the graph.
M = 16
index = faiss.IndexHNSWFlat(d,M,faiss.METRIC_INNER_PRODUCT)
# index.hnsw.efConstruction = xxxx # build time search breadth
# index.hsnw.efSearch = xxxxx # query time search breadth

In [None]:
# c. Add vectors to this index!
index.add(embeddings)

In [None]:
index.ntotal

4

In [None]:
# 4. Performing a query!
# query = "How do I take care of a baby cat?"
query = "tell me somethng about electric cars? and tell me somehting about cats?"
print(query)

tell me somethng about electric cars? and tell me somehting about cats?


In [None]:
# Embed the query and normalize it
query_vector = model.encode([query]).astype('float32')
faiss.normalize_L2(query_vector)

In [None]:
# set the number of results to retrieve :
k = 2
distances,indices = index.search(query_vector,k)

In [None]:
distances

array([[0.60382384, 0.40856588]], dtype=float32)

In [None]:
indices

array([[1, 3]])

In [None]:
documents

['Kitten care includes frequent feeding, grooming, and vaccinations.',
 'Electric cars are eco-friendly vehicles powered by batteries.',
 'The policy on pre-closing a personal loan requires a 2% fee.',
 'Cats are playful and independent pets.']

In [None]:
# --- 5. DISPLAY THE RESULTS ---
print(f"\nTop {k} Results from FAISS:")
for i in range(k):
    idx = indices[0][i]
    score = distances[0][i]
    print(f"  - Rank {i+1}: (Score: {score:.4f})")
    print(f"    Text: {documents[idx]}")


Top 2 Results from FAISS:
  - Rank 1: (Score: 0.6038)
    Text: Electric cars are eco-friendly vehicles powered by batteries.
  - Rank 2: (Score: 0.4086)
    Text: Cats are playful and independent pets.
