In [None]:
# pip install sentence-transformers faiss-cpu


In [9]:
# ✅ STEP 2: Embedding Text Chunks and Storing in FAISS Vector DB
import os
import json
import pickle
from typing import List

import numpy as np
import faiss
from sentence_transformers import SentenceTransformer

In [10]:
# --- Step 1: Load chunked text from Step 1 ---
def load_chunks(path: str = "../chunks/chunks.json") -> List[str]:
    with open(path, "r", encoding="utf-8") as f:
        chunks = json.load(f)
    print(f"Loaded {len(chunks)} text chunks.")
    return chunks

chunks = load_chunks("../chunks/chunks.json")

Loaded 157 text chunks.


In [11]:
def load_embedding_model(model_name: str = "all-MiniLM-L6-v2"):
    model = SentenceTransformer(model_name)
    print(f"Embedding model '{model_name}' loaded.")
    return model

model = load_embedding_model("all-MiniLM-L6-v2")


Embedding model 'all-MiniLM-L6-v2' loaded.


In [12]:
def generate_embeddings(text_chunks: List[str], model) -> np.ndarray:
    embeddings = model.encode(text_chunks, show_progress_bar=True, convert_to_numpy=True)
    print(f"Generated embeddings with shape: {embeddings.shape}")
    return embeddings

embeddings = generate_embeddings(chunks, model)


Batches: 100%|██████████| 5/5 [00:04<00:00,  1.10it/s]

Generated embeddings with shape: (157, 384)





In [13]:
def build_faiss_index(embeddings: np.ndarray):
    dim = embeddings.shape[1]
    index = faiss.IndexFlatL2(dim)
    index.add(embeddings)
    print(f"FAISS index built with {index.ntotal} vectors.")
    return index
index = build_faiss_index(embeddings)


FAISS index built with 157 vectors.


In [14]:
def save_faiss_index(index, texts: List[str], path: str = "../vectordb/faiss_index"):
    os.makedirs(path, exist_ok=True)
    faiss.write_index(index, os.path.join(path, "index.faiss"))

    with open(os.path.join(path, "texts.pkl"), "wb") as f:
        pickle.dump(texts, f)

    print(f"✅ Saved FAISS index and text metadata to {path}/")

save_faiss_index(index, chunks, "../vectordb/faiss_index")



✅ Saved FAISS index and text metadata to ../vectordb/faiss_index/
