In [2]:

"""
FAISS Hands-on: Embed & Index Documents

This script demonstrates:
1) Text chunking
2) Embedding with SentenceTransformers
3) Indexing with FAISS
4) Querying the index

Requirements:
pip install faiss-cpu sentence-transformers langchain-text-splitters
"""

from langchain_text_splitters import RecursiveCharacterTextSplitter
from sentence_transformers import SentenceTransformer
import numpy as np
import faiss

# -----------------------------
# 1. Sample documents
# -----------------------------
documents = [
    "FAISS is a library for efficient similarity search and clustering of dense vectors.",
    "Retrieval Augmented Generation (RAG) improves LLM responses by grounding them in retrieved documents.",
    "Embeddings convert text into numerical vectors that capture semantic meaning."
]

# -----------------------------
# 2. Chunk documents
# -----------------------------
splitter = RecursiveCharacterTextSplitter(
    chunk_size=200,
    chunk_overlap=20
)
chunks = splitter.create_documents(documents)
texts = [c.page_content for c in chunks]

# -----------------------------
# 3. Generate embeddings
# -----------------------------
model = SentenceTransformer("all-MiniLM-L6-v2")
embeddings = model.encode(texts, convert_to_numpy=True).astype("float32")

# -----------------------------
# 4. Create FAISS index
# -----------------------------
dimension = embeddings.shape[1]
index = faiss.IndexFlatL2(dimension)
index.add(embeddings)

print(f"Total vectors indexed: {index.ntotal}")

# -----------------------------
# 5. Query FAISS index
# -----------------------------
query = "What is FAISS used for?"
query_embedding = model.encode([query]).astype("float32")

D, I = index.search(query_embedding, k=2)

print("\nQuery:", query)
print("Top results:")
for idx in I[0]:
    print("-", texts[idx])

# -----------------------------
# 6. Save index (optional)
# -----------------------------
faiss.write_index(index, "faiss_index.bin")
print("\nFAISS index saved as faiss_index.bin")


Total vectors indexed: 3

Query: What is FAISS used for?
Top results:
- FAISS is a library for efficient similarity search and clustering of dense vectors.
- Embeddings convert text into numerical vectors that capture semantic meaning.

FAISS index saved as faiss_index.bin
