In [2]:
from langchain_community.embeddings import HuggingFaceBgeEmbeddings
from langchain_community.vectorstores import FAISS
from langchain.schema import Document
import pandas as pd

customer_data = pd.read_csv('Data\customer_data.csv')
crm_data = pd.read_csv('Data\crm_people_mock_data.csv')

model_name = "BAAI/bge-m3"
embeddings = HuggingFaceBgeEmbeddings(
    model_name=model_name,
    model_kwargs={"device": "cpu"},
    encode_kwargs={"normalize_embeddings": True}
)

def create_documents(df, source_name):
    documents = []
    for idx, row in df.iterrows():
        content = " ".join([f"{col}: {str(val)}" for col, val in row.items() if pd.notna(val)])
        doc = Document(
            page_content=content,
            metadata={"source": source_name, "row_id": idx}
        )
        documents.append(doc)
    return documents

customer_docs = create_documents(customer_data, "customer_data")
customer_db = FAISS.from_documents(customer_docs, embeddings)
crm_docs = create_documents(crm_data, "crm_data")
crm_db = FAISS.from_documents(crm_docs, embeddings)
customer_db.save_local("customer_index")
crm_db.save_local("crm_index")

  embeddings = HuggingFaceBgeEmbeddings(
  from .autonotebook import tqdm as notebook_tqdm


In [4]:
# Load saved indices with allow_dangerous_deserialization=True
loaded_customer_db = FAISS.load_local("customer_index", embeddings, allow_dangerous_deserialization=True)
loaded_crm_db = FAISS.load_local("crm_index", embeddings, allow_dangerous_deserialization=True)
query = "Find customers interested in apples"
results = loaded_customer_db.similarity_search(query, k=2)