In [None]:
!pip install -q langchain chromadb sentence-transformers google-generativeai gtts


In [None]:
!pip install -U langchain-community


In [None]:
!pip install -U langchain-google-genai


In [None]:
from langchain_community.embeddings import HuggingFaceEmbeddings
embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")


In [None]:
import sqlite3

conn = sqlite3.connect("/content/mahindra_university_data.db")
cursor = conn.cursor()

cursor.execute("SELECT name FROM sqlite_master WHERE type='table';")
tables = cursor.fetchall()
print("Tables in database:", tables)


In [None]:
cursor.execute("PRAGMA table_info(pages);")
print("Pages table columns:", cursor.fetchall())

cursor.execute("PRAGMA table_info(page_sections);")
print("Page sections table columns:", cursor.fetchall())


In [None]:
import sqlite3
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import Chroma
from langchain.schema import Document

# Load SQLite DB
conn = sqlite3.connect("/content/mahindra_university_data.db")
cursor = conn.cursor()

# Fetch all content from the 'pages' table
cursor.execute("SELECT url, title, content FROM pages")
rows = cursor.fetchall()

# Build LangChain Document objects
documents = []
for row in rows:
    url, title, content = row
    full_text = f"{title}\n\n{content}\n\nURL: {url}"
    documents.append(Document(page_content=full_text))

print(f"Loaded {len(documents)} documents from SQLite database.")
print("\nSample Document:\n", documents[0])

# Embedding model
embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

# Create Chroma vector DB
persist_directory = "/content/chroma_db"
vectordb = Chroma.from_documents(
    documents=documents,
    embedding=embedding_model,
    persist_directory=persist_directory
)
vectordb.persist()


In [None]:
from langchain_google_genai import ChatGoogleGenerativeAI

llm = ChatGoogleGenerativeAI(
    model="gemini-1.5-pro",
    temperature=0.7,
    google_api_key="YOUR_API_KEY"
)


In [None]:
from langchain.chains import RetrievalQA

retriever = vectordb.as_retriever(search_kwargs={"k": 3})

rag_chain = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=retriever,

)


In [None]:
from gtts import gTTS
from IPython.display import Audio, display

def speak(text, filename="response.mp3"):
    tts = gTTS(text)
    tts.save(filename)
    display(Audio(filename, autoplay=True))  # ✅ autoplay is critical


In [None]:
query = "Where is mahindra university?"  # ⬅️ Change this every time


In [None]:
response = rag_chain.run(query)
print("Bot:", response)
speak(response)  # Plays voice in notebook


In [None]:
from transformers import pipeline
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import Chroma
from langchain.schema import Document
from langchain.chains import RetrievalQA



In [None]:
embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")


In [None]:
faq_data = [
    Document(page_content="You can apply to Mahindra University via the official admissions portal. Admission is based on entrance exams like JEE Main, CLAT, CAT, or the Mahindra University Aptitude Test (MUAT)."),
    Document(page_content="Mahindra University was established in 2014 and is located in Hyderabad, Telangana, India."),
    Document(page_content="The university offers B.Tech, MBA, Law, BBA, M.Tech, Ph.D., and programs in Digital Media, Hospitality, and Design Innovation."),
    Document(page_content="Hostel facilities are available for all full-time students, with modern amenities and recreational areas."),
    Document(page_content="Mahindra University has collaborations with École Centrale Paris, Cornell University, and Virginia Tech for research and student exchange."),
    Document(page_content="The university campus spans 130 acres and includes smart classrooms, advanced laboratories, sports facilities, and research centers."),
    Document(page_content="Top recruiters for placements include Amazon, Microsoft, Deloitte, and the Mahindra Group, with median salary packages ranging from ₹8-12 LPA."),
    Document(page_content="Mahindra University offers B.Tech programs in CSE, AI, ECE, Mechanical; MBA; and BA LLB degrees."),

]


faq_vectordb = Chroma.from_documents(
    documents=faq_data,
    embedding=embedding_model,
    persist_directory="/content/chroma_faq"
)
faq_vectordb.persist()
faq_retriever = faq_vectordb.as_retriever(search_kwargs={"k": 1})


In [None]:
general_vectordb = Chroma(
    persist_directory="/content/chroma_general",  # This is your full knowledge base
    embedding_function=embedding_model
)
general_retriever = general_vectordb.as_retriever(search_kwargs={"k": 3})


In [None]:
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM

flan_tokenizer = AutoTokenizer.from_pretrained("google/flan-t5-base")
flan_model = AutoModelForSeq2SeqLM.from_pretrained("google/flan-t5-base")


In [None]:
def generate_flan_response(intent, query, content=None):
    prompt = build_prompt(intent, query, content)

    inputs = flan_tokenizer(prompt, return_tensors="pt", padding=True, truncation=True)

    outputs = flan_model.generate(
        **inputs,
        max_new_tokens=80,
        do_sample=True,
        temperature=0.7,
        top_k=5,
        top_p=0.95,
        num_beams=4,
        #early_stopping=True
    )

    response = flan_tokenizer.decode(outputs[0], skip_special_tokens=True).strip()
    return response


In [None]:
def build_prompt(intent, query, content=None):

    if intent == "greeting":

          return (
            "You are Mahindra University's official virtual assistant.\n"
            "The user has greeted you. Reply with a warm, friendly message.\n"
            "Include a welcome, introduce yourself, and offer help related to Mahindra University — such as courses, admissions, or campus life.\n\n"
            f"User: {query}\n"

        )


    elif intent == "faq" and content:
        return (
            "You are Mahindra University's official virtual assistant.\n"
        "The user has asked a frequently asked question.\n"
        "Using the information below, answer the question in a warm, polite, and complete sentence as if you're assisting a student.\n"
        "Be clear, student-friendly, and avoid repeating the content directly — rephrase naturally if needed.\n\n"
        f"User's question: {query}\n"
        f"University info: {content}\n"
        "Assistant:"
        )

    else:
        return (
            "You are Mahindra University's official chatbot.\n"
            "Answer the user's question clearly and accurately using university data.\n\n"
            f"User: {query}\nAssistant:"
        )


In [None]:
def slm_chat(query):
    intent = classify_intent(query)
    print("Detected intent:", intent)

    if intent == "faq":
        # 🔗 Connect to Chroma FAQ DB
        results = faq_retriever.get_relevant_documents(query)
        context = results[0].page_content if results else None
        response_text = generate_flan_response(intent, query, content=context)

    elif intent == "greeting":
        response_text = generate_flan_response(intent, query)

    elif intent == "open_query":
        prompt = build_prompt(intent, query)
        response = llm.invoke(prompt)
        response_text = response.content

    else:
        response_text = "I'm not sure how to handle that query yet."

    print("Bot:", response_text)
    speak(response_text)


In [None]:
slm_chat("Hi there!")



In [None]:

slm_chat("courses offered in  mahindra university")


In [None]:

slm_chat("Top recruiters for placements in  mahindra university")


In [None]:
slm_chat("where is mahindra university located")