In [9]:
import pandas as pd
from langchain.vectorstores import FAISS
from langchain_huggingface import HuggingFaceEmbeddings
from langchain.docstore.document import Document

# ✅ Step 1: Read Excel
df = pd.read_excel("../../data/processed/combined-project-panel.xlsx")

In [10]:
# Optional: clean & validate
df = df.dropna(subset=["lecturer_name", "title", "source"])
df["lecturer_name"] = df["lecturer_name"].str.strip()
df["title"] = df["title"].str.strip()
df["source"] = df["source"].str.strip()

In [11]:
# ✅ Step 2: Prepare LangChain Documents
docs = []
for _, row in df.iterrows():
    doc = Document(
        page_content=row["title"],
        metadata={
            "lecturer_name": row["lecturer_name"],
            "source": row["source"]
        }
    )
    docs.append(doc)

In [14]:
# ✅ Step 3: Generate Embeddings & Save to FAISS
embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
db = FAISS.from_documents(docs, embedding_model)
db.save_local("panel_db")  # This creates panel_db/index.faiss and panel_db/index.pkl

  from .autonotebook import tqdm as notebook_tqdm
To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development
Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`
