# RAG Chatbot Project

### 1st we should convert the pdf to text

In [10]:
# pdf to  text converter
import pdfplumber

pdf_files = [
    "content/admission_advertisement_25_batch.pdf",
    "content/ProspectusBatch_2514.05.2025.pdf"
]

documents = []

for pdf_path in pdf_files:
    with pdfplumber.open(pdf_path) as pdf:
        for page_num, page in enumerate(pdf.pages):
            text = page.extract_text()
            if text:
                documents.append({
                    "text": text,
                    "source": pdf_path,
                    "page": page_num + 1
                })


# 2. Text Chunking

In [11]:
def chunk_text(text, chunk_size=500, overlap=50):
    chunks = []
    start = 0

    while start < len(text):
        end = start + chunk_size
        chunks.append(text[start:end])
        start = end - overlap

    return chunks


chunks = []

for doc in documents:
    split_texts = chunk_text(doc["text"])
    for chunk in split_texts:
        chunks.append({
            "text": chunk,
            "source": doc["source"],
            "page": doc["page"]
        })

### 3. Embeddings + FAISS

In [14]:
from sentence_transformers import SentenceTransformer
import faiss
import numpy as np

In [15]:
model = SentenceTransformer("all-MiniLM-L6-v2")

#### Create embeddings

In [16]:
texts = [c["text"] for c in chunks]
embeddings = model.encode(texts, batch_size=32, show_progress_bar=True)

embeddings = np.array(embeddings).astype("float32")

Batches: 100%|██████████| 20/20 [00:08<00:00,  2.30it/s]


#### Build FAISS index

In [17]:
dim = embeddings.shape[1]
index = faiss.IndexFlatL2(dim)
index.add(embeddings)

#### Keep metadata aligned with vectors

In [18]:
metadata = chunks

### 4. Save the FAISS index and metadata

In [None]:
faiss.write_index(index, "faiss.index")

import pickle
with open("metadata.pkl", "wb") as f:
    pickle.dump(metadata, f)

## Let's load the index and metadata

In [5]:
import faiss
import pickle
from sentence_transformers import SentenceTransformer
import numpy as np

index = faiss.read_index("faiss.index")

with open("metadata.pkl", "rb") as f:
    metadata = pickle.load(f)

model = SentenceTransformer("all-MiniLM-L6-v2")

## Retrieval function

In [6]:
def retrieve(query, k=5):
    q_embedding = model.encode([query]).astype("float32")
    distances, indices = index.search(q_embedding, k)

    results = []
    for idx in indices[0]:
        results.append(metadata[idx])

    return results

## Example test without LLM

In [7]:
query = "What is the admission deadline?"
docs = retrieve(query)

for d in docs:
    print(d["source"], "page", d["page"])
    print(d["text"][:200])
    print("----")


content/ProspectusBatch_2514.05.2025.pdf page 127
RULES AND PROCEDURE FOR ADMISSION
126
----
content/ProspectusBatch_2514.05.2025.pdf page 17
2.02.2026 28.07.2026
Pre-Admission Test (26-Batch) on 27.05.2026 Start of a new session (26-Batch) on 07.09.2026
Summer Vaca on
Including Make-up
semester
16
----
content/ProspectusBatch_2514.05.2025.pdf page 3
 supersede the old
ones.
Enquiries concerning admissions should be addressed to:
The Registrar or Chairman Admission Commi ee
Quaid-e-Awam University of Engineering, Science and Technology, Nawabshah,
----
content/ProspectusBatch_2514.05.2025.pdf page 17
ACADEMIC CALENDAR
SSSEEEMMMEEESSSTTTEEERRR SSSYYYSSSTTTEEEMMM (((OOOBBBEEE SSSYYYSSSTTTEEEMMM)))
1st Semester 2nd Semester Winter Summer Vaca on
Batch & Semester First Year First Year with
Vaca on
(25
----
content/admission_advertisement_25_batch.pdf page 3
PRE-ADMISSION TEST
01. All eligible candidates shall be required to appear in Pre-Admission Test date
the computer-based Pre-Admission

# APP

### Google Model

In [None]:
import os
from langchain_google_genai import ChatGoogleGenerativeAI

os.environ["GOOGLE_API_KEY"] = ""

llm = ChatGoogleGenerativeAI(
    model="gemini-1.5-pro",
    temperature=0
)

### LLama Model using Groq

In [None]:
import os 
from langchain_groq import ChatGroq

os.environ["GROQ_API_KEY"] = ""

llm = ChatGroq(
    model="llama-3.3-70b-versatile",
    temperature=0
)

### Deepseek-r1 using OpenRouter

In [None]:
from langchain_openai import ChatOpenAI
import os

os.environ["OPENROUTER_API_KEY"] = ""

llm = ChatOpenAI(
    model="deepseek/deepseek-r1",
    base_url="https://openrouter.ai/api/v1",
    api_key=os.environ["OPENROUTER_API_KEY"],
    temperature=0
)

In [32]:
def rag_answer(query, k=5):
    docs = retrieve(query, k)

    context = "\n\n".join(
        [f"Source: {d['source']} Page: {d['page']}\n{d['text']}" for d in docs]
    )

    prompt = f"""
Use only the context below.
Answer the question based on the context.
QUEST - Quaid-e-Awam University - Nawabshah
If answer is not present, say: Not found in documents But I will try to find the answer.


Context:
{context}

Question:
{query}
"""

    return llm.invoke(prompt).content

In [33]:
print(rag_answer("What are the eligibility criteria for undergraduate admissions?"))

Based solely on the provided context, the eligibility criteria for undergraduate admissions at Quaid-e-Awam University are:

1.  **Educational Qualification:** Candidates must have passed or be appearing in the HSC Part-II (or equivalent) examination:
    *   In the year 2025, or
    *   Before 2025, but not earlier than the Annual Examination 2020.
    *   Candidates who passed SSC-II (Matric) Annual examination before 2018 are **not eligible**.
    *   Candidates can apply using HSC-I marks, but **must pass HSC-II** with the required minimum marks.

2.  **Minimum Marks:**
    *   **Engineering Programs (B.E.):** At least 60% marks in HSC-II (excluding Hifz-ul-Quran marks and any grace marks given by the board for grade improvement).
    *   **Science and Technology Programs (BS - CET & FET):** At least 50% marks in HSC-II.

3.  **Pre-Admission Test:** Candidates applying for B.E./BS Programs **are required to appear** in a computerized MCQ-type Pre-Admission Test and **must obtain a 

In [36]:
print(rag_answer("Faculty of Engineering and Technology offers which degree programs?"))

Based solely on the provided context, the Faculty of Engineering and Technology at QUEST offers the following degree programs:

1.  **Bachelor's Programs:**
    *   BS Engineering Technology (Civil Technology, Mechanical Technology, Electrical Technology) - *Faculty of Technology*
    *   Bachelor of Engineering (BE) in Software Engineering - *4 years (8 semesters)*
    *   Bachelor of Electronic Engineering
    *   Computer Systems Engineering (Bachelor's)
    *   Computer Communication & Networks (Bachelor's)
    *   Civil Engineering
    *   Building & Architectural Engineering
    *   Mechanical Engineering
    *   Industrial & Manufacturing Engineering
    *   Electrical Engineering
    *   Electrical Engineering (Automation & Control)

2.  **Master's Programs:**
    *   Master of Engineering in Electronic Engineering (Communications Engineering, Industrial Automation & Control)
    *   Master of Engineering in Civil Engineering
    *   Master of Engineering in Construction Engine