In [None]:
# CELL 1
from google.colab import drive
drive.mount('/content/drive')

!mkdir -p data/policies
!cp /content/drive/MyDrive/CU_Policies/*.pdf data/policies/

Mounted at /content/drive


In [None]:
# CELL 2: Fixed Installation (Latest Compatible Versions)
!pip install -q \
    langchain \
    langchain-community \
    langchain-huggingface \
    faiss-cpu \
    pypdf \
    streamlit \
    pyngrok \
    python-dotenv \
    langchain-groq

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.5/2.5 MB[0m [31m44.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m31.4/31.4 MB[0m [31m80.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m323.9/323.9 kB[0m [31m29.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m10.2/10.2 MB[0m [31m139.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m135.8/135.8 kB[0m [31m17.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.9/6.9 MB[0m [31m67.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m64.7/64.7 kB[0m [31m7.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m50.9/50.9 kB[0m [31m5.7 MB/s[0m eta [36m0:00:00[0m
[?25h[31mERROR: pip's dependency resolver does 

In [None]:
# CELL 3: Updated app.py (Compatible with Latest LangChain)
%%writefile app.py
import os
import streamlit as st
from langchain_community.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS
from langchain_groq import ChatGroq
from dotenv import load_dotenv

# -------------------------------------------------
# Page config
# -------------------------------------------------
st.set_page_config(page_title="CU Policy Chatbot", layout="centered")
st.title("Chandigarh University Student Help Chatbot")
st.caption("Ask anything about CU policies – powered by Llama-3-8B via Groq")

# -------------------------------------------------
# Load environment variables (GROQ_API_KEY)
# -------------------------------------------------

GROQ_API_KEY = "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"
if not GROQ_API_KEY:
    st.error("❌ GROQ_API_KEY not found. Add it in **Secrets** (left pane) or create a `.env` file.")
    st.stop()

# -------------------------------------------------
# 1. Build FAISS index – cached once
# -------------------------------------------------
@st.cache_resource(show_spinner="Loading PDFs & building index…")
def build_vectorstore():
    folder = "data/policies"
    if not os.path.exists(folder):
        st.error("❌ PDF folder not found! Run the Drive mount cell first.")
        st.stop()

    docs = []
    for file in os.listdir(folder):
        if file.lower().endswith(".pdf"):
            loader = PyPDFLoader(os.path.join(folder, file))
            docs.extend(loader.load())

    if not docs:
        st.error("❌ No PDFs found in data/policies!")
        st.stop()

    splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
    chunks = splitter.split_documents(docs)

    embedder = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
    db = FAISS.from_documents(chunks, embedder)
    return db, embedder

db, embedder = build_vectorstore()
st.success(f"✅ Indexed {len(db.index_to_docstore_id)} chunks from {len(os.listdir('data/policies'))} PDFs!")

# -------------------------------------------------
# 2. LLM – lazy, API-based
# -------------------------------------------------
@st.cache_resource
def get_llm():
    return ChatGroq(
        model="llama-3.1-8b-instant",
        temperature=0.2,
        groq_api_key=GROQ_API_KEY,
    )

llm = get_llm()

# -------------------------------------------------
# 3. UI – ask a question
# -------------------------------------------------
query = st.text_input("Your question about CU policies", placeholder="e.g. What is the late-fee policy?")

if st.button("Ask") and query.strip():
    with st.spinner("Searching policies…"):
        docs = db.similarity_search(query, k=5)
        context = "\n\n".join([d.page_content for d in docs])

    prompt = f"""Use **only** the following context to answer the question factually.
If the answer isn't in the context, say: "I couldn't find that information in the policies."

Context:
{context}

Question: {query}
Answer:"""

    with st.spinner("Generating answer with Llama-3…"):
        try:
            answer = llm.invoke(prompt).content.strip()
            st.success("✅ Answer")
            st.markdown(answer)
        except Exception as e:
            st.error(f"❌ LLM error: {e}. Check your Groq API key.")
else:
    if query.strip():
        st.info("Click 'Ask' to get your answer!")

Writing app.py


In [None]:
# CELL 5
import subprocess, time, os
from pyngrok import ngrok

# ---- Kill any old processes -------------------------------------------------
!pkill -f streamlit 2>/dev/null
!pkill -f ngrok    2>/dev/null

# ---- Start Streamlit (background) -------------------------------------------
proc = subprocess.Popen([
    "streamlit", "run", "app.py",
    "--server.port", "8501",
    "--server.address", "0.0.0.0",
    "--server.headless", "true"
], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)

time.sleep(10)   # give Streamlit a moment to boot

# ---- ngrok tunnel -----------------------------------------------------------
ngrok.kill()    # clean old tunnels
ngrok.set_auth_token("34uz6DmUmqAglxJz9Sm6lO09V7u_4tY7UyqdYoMk15iyrtQAQ")
public_url = ngrok.connect(8501, bind_tls=True)
print(f"\nYour chatbot is LIVE at:\n   {public_url}\n")

^C
^C

Your chatbot is LIVE at:
   NgrokTunnel: "https://aciculate-alice-nontumultuous.ngrok-free.dev" -> "http://localhost:8501"

