In [3]:
import re
import os
import pickle
import pandas as pd
import faiss
import numpy as np
from langchain.memory import ConversationBufferMemory
from langchain.embeddings import HuggingFaceEmbeddings
from sentence_transformers import SentenceTransformer

# ✅ Load Data
data_path = "ProcessedData/Campuses Data (Responses)(1).xlsx"
df1 = pd.read_excel(data_path)

# ✅ Normalize campus names for better search
df1["Normalized Campus Name"] = df1["Name of the Campus"].str.replace(r"\bDSEU\b", "", regex=True, case=False).str.strip()

# ✅ Select the problematic column name
labs_col = "Labs In the Campus(Provide Labs' description with Labs' name including the departments they fall in)"

# ✅ Initialize Hugging Face Embeddings (Improved Model)
embedding_model = SentenceTransformer("sentence-transformers/multi-qa-MiniLM-L6-cos-v1")

# ✅ Paths for FAISS indexes and pickle storage
index_paths = {
    "campus": "faiss_campus.index",
    "course": "faiss_course.index",
    "location": "faiss_location.index",
}
pickle_path = "campus_data.pkl"

# ✅ Function to create FAISS index
def create_faiss_index(vectors):
    dimension = vectors.shape[1]
    index = faiss.IndexFlatL2(dimension)
    index.add(vectors)
    return index

# ✅ Check if FAISS indexes already exist
if all(os.path.exists(path) for path in index_paths.values()) and os.path.exists(pickle_path):
    print("🔄 Loading FAISS indexes and data from storage...")
    
    # Load FAISS indexes
    faiss_campus = faiss.read_index(index_paths["campus"])
    faiss_course = faiss.read_index(index_paths["course"])
    faiss_location = faiss.read_index(index_paths["location"])
    
    # Load DataFrame with stored vectors
    with open(pickle_path, "rb") as f:
        df1 = pickle.load(f)
else:
    print("📌 Generating new FAISS indexes and storing data...")

    # Compute vector embeddings
    df1["Campus Vector"] = df1["Normalized Campus Name"].apply(lambda x: embedding_model.encode(x))
    df1["Course Vector"] = df1["Courses Offered by the Campus"].fillna("").apply(lambda x: embedding_model.encode(x))
    df1["Location Vector"] = df1["Location of the campus"].fillna("").apply(lambda x: embedding_model.encode(x))

    # Stack vectors into NumPy arrays
    campus_vectors = np.stack(df1["Campus Vector"].values)
    course_vectors = np.stack(df1["Course Vector"].values)
    location_vectors = np.stack(df1["Location Vector"].values)

    # Create FAISS indexes
    faiss_campus = create_faiss_index(campus_vectors)
    faiss_course = create_faiss_index(course_vectors)
    faiss_location = create_faiss_index(location_vectors)

    # Save FAISS indexes to files
    faiss.write_index(faiss_campus, index_paths["campus"])
    faiss.write_index(faiss_course, index_paths["course"])
    faiss.write_index(faiss_location, index_paths["location"])

    # Save DataFrame with vectors to a pickle file
    with open(pickle_path, "wb") as f:
        pickle.dump(df1, f)

# ✅ Function to format output
def format_output(row):
    return f"""
    🎓 **{row.get('Name of the Campus', 'Unknown Campus')}**
    📍 **Campus Name**: {row.get('Name of the Campus', 'N/A')}
    📧 **Email**: {row.get('Email Address', 'N/A')}
    📌 **Location**: {row.get('Location of the campus', 'N/A')}
    📚 **Courses Offered**: {row.get('Courses Offered by the Campus', 'N/A')}
    🏛️ **Labs & Descriptions**: {row.get(labs_col, 'N/A')}
    📸 **Campus Photos**: {row.get('Upload the Photos of the Campus', 'N/A')}
    ℹ️ **Additional Data**: {row.get('Any other Data', 'N/A')}
    """

# ✅ Function to search using FAISS
def search_campus(query, faiss_index, vector_column, df, top_k=1):
    query_vector = embedding_model.encode(query).reshape(1, -1)
    D, I = faiss_index.search(query_vector, k=top_k)  # Retrieve top-k matches
    results = []

    for idx in I[0]:
        if idx == -1:
            continue
        row = df.iloc[idx]
        results.append(format_output(row))

    return results if results else ["⚠️ No matching results found!"]

# ✅ Enhanced Function to Handle Multi-Query Search
def search_university(query):
    query = query.lower().strip()

    # 🔍 **Check for course-related search**
    if "course" in query:
        match = re.search(r"course (.+)", query)
        if match:
            course_query = match.group(1).strip()
            return search_campus(course_query, faiss_course, "Course Vector", df1)

    # 🔍 **Check for location-related search**
    if "location" in query:
        match = re.search(r"location (.+)", query)
        if match:
            location_query = match.group(1).strip()
            return search_campus(location_query, faiss_location, "Location Vector", df1)

    # 🔍 **Default: Search by Campus Name**
    return search_campus(query, faiss_campus, "Campus Vector", df1)

# ✅ Initialize LangChain Memory for Chat History
memory = ConversationBufferMemory()

# ✅ Main Chatbot Loop
print("🏫 Welcome to the University Campus Chatbot! Type 'exit' to stop.")

while True:
    user_query = input("\nSearch by course name, location, or campus: ")
    
    if user_query.lower() == "exit":
        print("👋 Goodbye! Have a great day.")
        break
    
    # Store conversation in memory
    memory.chat_memory.add_user_message(user_query)
    
    # Get search results (returns multiple matches)
    responses = search_university(user_query)
    
    # Store bot response in memory
    for response in responses:
        memory.chat_memory.add_ai_message(response)
        print(f"\n🔍 Search Results:\n{response}")


🔄 Loading FAISS indexes and data from storage...
🏫 Welcome to the University Campus Chatbot! Type 'exit' to stop.



Search by course name, location, or campus:  rajkori



🔍 Search Results:

    🎓 **DSEU Rajokri Campus**
    📍 **Campus Name**: DSEU Rajokri Campus
    📧 **Email**: sunita.k.chaurasia@dseu.ac.in
    📌 **Location**: Near Shiv Mandir Rajokri
    📚 **Courses Offered**: Diploma in Computer Engineering, Diploma in Artificial Intelligence & Machine Learning, Bachelor in Computer Application, B.Sc in Data Analytics 
    🏛️ **Labs & Descriptions**: 6 Computer Labs, 1 Hardware/Electronics Lab, 1 Electrical Workshop
    📸 **Campus Photos**: https://drive.google.com/open?id=1_qa0ByLO4rw_803RlEPV6-4LpaK27Qgh
    ℹ️ **Additional Data**: N/A
    



Search by course name, location, or campus:  exit


👋 Goodbye! Have a great day.
