In [None]:
import pandas as pd
from sentence_transformers import SentenceTransformer
from langchain.vectorstores import FAISS
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.schema import Document
from tqdm import tqdm  # 👈 progress bar
import os

# 1. Load your chunked data
chunk_df = pd.read_csv("../data/chunked_complaints_500_100.csv")
print("✅ Loaded chunked complaints:", chunk_df.shape)

# 2. Prepare the embedding model
embedding_model = HuggingFaceEmbeddings(
    model_name="sentence-transformers/all-MiniLM-L6-v2"
)

# 3. Convert to LangChain Documents with progress tracking
documents = []

print("📦 Preparing documents with metadata...")
for _, row in tqdm(chunk_df.iterrows(), total=len(chunk_df), desc="Creating documents"):
    documents.append(
        Document(
            page_content=row["text_chunk"],
            metadata={
                "complaint_id": row["complaint_id"],
                "product": row["product"]
            }
        )
    )

print(f"✅ Prepared {len(documents)} documents.")

# 4. Create FAISS index from documents (progress shown by FAISS internally)
print("⚙️ Embedding and indexing...")
vector_store = FAISS.from_documents(documents, embedding_model)

# 5. Save to disk
os.makedirs("../vector_store", exist_ok=True)
vector_store.save_local("../vector_store")

print("✅ FAISS vector store saved to: ../vector_store/")
