In [7]:
import os
import re
import pandas as pd
from thefuzz import process  # Import fuzzy matching
from langchain.vectorstores import FAISS
from langchain.chains import ConversationalRetrievalChain
from langchain.memory import ConversationBufferMemory
from langchain.embeddings import HuggingFaceEmbeddings

# ✅ Create and Store FAISS Index
FAISS_INDEX_PATH = "faiss_index_faculty_data"

if not os.path.exists(FAISS_INDEX_PATH):
    print(f"⚠️ FAISS index not found at {FAISS_INDEX_PATH}, creating a new one...")
    df = pd.read_excel("ProcessedData/findig_mail.xlsx")
    df["Normalized Name"] = df["firstName"].str.lower().str.strip()
    df["Normalized Email"] = df["email"].str.lower().str.strip()
    df["Normalized Department"] = df["organizationUnit"].str.lower().str.strip()
    df["Normalized Designation"] = df["designation"].str.lower().str.strip()
    embeddings = HuggingFaceEmbeddings(model_name="BAAI/bge-base-en")
    vectorstore = FAISS.from_texts(df["Normalized Name"].tolist() + df["Normalized Email"].tolist() + df["Normalized Department"].tolist() + df["Normalized Designation"].tolist(), embeddings)
    vectorstore.save_local(FAISS_INDEX_PATH)
    print("✅ FAISS index created and saved successfully.")
else:
    embeddings = HuggingFaceEmbeddings(model_name="BAAI/bge-base-en")
    vectorstore = FAISS.load_local(
        FAISS_INDEX_PATH,
        embeddings,
        allow_dangerous_deserialization=True
    )

retriever = vectorstore.as_retriever(search_type="similarity", search_kwargs={"k": 5})  # 🔹 FAISS optimized retrieval

# ✅ Load Faculty Data
df = pd.read_excel("ProcessedData/findig_mail.xlsx")
df["Normalized Name"] = df["firstName"].str.lower().str.strip()
df["Normalized Email"] = df["email"].str.lower().str.strip()
df["Normalized Department"] = df["organizationUnit"].str.lower().str.strip()
df["Normalized Designation"] = df["designation"].str.lower().str.strip()

# ✅ Function to Format Output
def format_output(row):
    return f"""
    👤 **Name**: {row.get('firstName', 'Unknown')}
    📧 **Email**: {row.get('email', 'N/A')}
    🏢 **Department**: {row.get('organizationUnit', 'N/A') if pd.notna(row.get('organizationUnit')) else 'N/A'}
    🏷️ **Designation**: {row.get('designation', 'N/A') if pd.notna(row.get('designation')) else 'N/A'}
    """

# ✅ Function to Search Faculty Data
def search_faculty(query, df):
    query = query.lower().strip()

    # ✅ Step 1: Exact Match - Search by Name, Email, Department, or Designation
    exact_match_df = df[(df["Normalized Name"] == query) | (df["Normalized Email"] == query) | (df["Normalized Department"] == query) | (df["Normalized Designation"] == query)]
    if not exact_match_df.empty:
        row = exact_match_df.iloc[0]
        return format_output(row)

    # ✅ Step 2: If No Exact Match, Use FAISS for Retrieval
    faiss_results = retriever.invoke(query)
    if faiss_results:
        for doc in faiss_results:
            faculty_name = doc.page_content.strip()
            row = df[(df['Normalized Name'].str.contains(faculty_name, case=False, na=False)) |
                     (df['Normalized Email'].str.contains(faculty_name, case=False, na=False)) |
                     (df['Normalized Department'].str.contains(faculty_name, case=False, na=False)) |
                     (df['Normalized Designation'].str.contains(faculty_name, case=False, na=False))]
            if not row.empty:
                return format_output(row.iloc[0])

    # ✅ Step 3: If No FAISS Results, Use Fuzzy Matching
    for column in ["Normalized Name", "Normalized Email", "Normalized Department", "Normalized Designation"]:
        best_match, score = process.extractOne(query, df[column].unique())
        if best_match and score > 75:
            fuzzy_match_df = df[df[column].str.contains(best_match, na=False)]
            row = fuzzy_match_df.iloc[0]
            return format_output(row)

    return "⚠️ No faculty found matching your query!"

# ✅ CLI Chatbot Loop
print("🎓 Welcome to the Faculty Search Chatbot! Type 'exit' to stop.")
while True:
    user_query = input("\nSearch faculty by name, email, department, or designation: ")
    if user_query.lower() == "exit":
        print("👋 Goodbye! Have a great day.")
        break
    response = search_faculty(user_query, df)
    print(f"\n🔍 Search Results:\n{response}")

🎓 Welcome to the Faculty Search Chatbot! Type 'exit' to stop.



Search faculty by name, email, department, or designation:  electrical



🔍 Search Results:

    👤 **Name**: Jagvir
    📧 **Email**: jagvir.singh@dseu.ac.in
    🏢 **Department**: Department of Electrical Engineering
    🏷️ **Designation**: Associate Professor
    



Search faculty by name, email, department, or designation:  exit


👋 Goodbye! Have a great day.
