In [1]:
print(10)

10


In [2]:
from pypdf import PdfReader
import os

print(os.getenv("GEMINI_API_KEY"))


AIzaSyAHxV-uXIZ6mxe-RQFymVO73ju3f7v7-g4


In [3]:
DATA_DIR = "data"

def load_pdf_text(file_path):
    reader = PdfReader(file_path)
    text = ""
    for page in reader.pages:
        extracted = page.extract_text()
        if extracted:
            text += extracted + "\n"
    return text

documents = []

for file in os.listdir(DATA_DIR):
    if file.endswith(".pdf"):
        text = load_pdf_text(os.path.join(DATA_DIR, file))
        documents.append({
            "source": file,
            "text": text
        })

print("Total PDFs loaded:", len(documents))
for doc in documents:
    print(doc["source"], "-> characters:", len(doc["text"]))


Total PDFs loaded: 10
Academic_Regulations.pdf -> characters: 1573
BTech_Academic_Regulations.pdf -> characters: 1131
BTech_Branch_Wise_Syllabus.pdf -> characters: 692
BTech_Placement_Policy.pdf -> characters: 433
BTech_Project_and_Internship_Guidelines.pdf -> characters: 404
BTech_Student_Handbook.pdf -> characters: 365
Examination_Guidelines.pdf -> characters: 1058
Placement_Policy.pdf -> characters: 1038
syllabus.pdf -> characters: 452032
TCS NQT IMP CONCEPTS.pdf -> characters: 601


In [4]:
def chunk_text(text, chunk_size=500, overlap=100):
    chunks = []
    start = 0
    while start < len(text):
        end = start + chunk_size
        chunks.append(text[start:end])
        start = end - overlap
    return chunks

chunks = []

for doc in documents:
    small_chunks = chunk_text(doc["text"])
    for ch in small_chunks:
        chunks.append({
            "source": doc["source"],
            "text": ch
        })

print("Total chunks created:", len(chunks))
print("\nSample chunk:\n")
print(chunks[0]["text"])


Total chunks created: 1153

Sample chunk:

Academic Regulations â€“ Undergraduate Programs 
1. Introduction 
These academic regulations govern all undergraduate students enrolled in the institution. The 
objective is to maintain academic discipline, ensure fair evaluation, and promote holistic learning. 
 
2. Academic Year & Semester System 
â€¢ The academic year is divided into two semesters: 
o Odd Semester (Julyâ€“November) 
o Even Semester (Januaryâ€“May) 
â€¢ Each semester consists of: 
o Minimum 90 instructional days 
o Internal assessments 


In [5]:
from sentence_transformers import SentenceTransformer
import numpy as np


In [6]:
# Load embedding model
embedder = SentenceTransformer("all-MiniLM-L6-v2")

# Take only text from chunks
texts = [chunk["text"] for chunk in chunks]

# Convert text to vectors
embeddings = embedder.encode(texts)

print("Embeddings created for chunks:", len(embeddings))
print("Vector size:", embeddings.shape[1])


Embeddings created for chunks: 1153
Vector size: 384


In [7]:
import faiss

# Create FAISS index
dimension = embeddings.shape[1]
index = faiss.IndexFlatL2(dimension)

# Add embeddings to FAISS
index.add(np.array(embeddings))

print("Total chunks stored in FAISS:", index.ntotal)


Total chunks stored in FAISS: 1153


In [8]:
# Save FAISS index and chunks for Streamlit
faiss.write_index(index, "faiss.index")
np.save("chunks.npy", chunks)

print("FAISS index and chunks saved successfully")


FAISS index and chunks saved successfully


In [9]:
# Student question
query = "What is the minimum attendance required?"

# Convert question to vector
query_embedding = embedder.encode([query])

# Search FAISS for top 2 relevant chunks
D, I = index.search(np.array(query_embedding), k=2)

print("Retrieved chunks:\n")

for idx in I[0]:
    print("Source:", chunks[idx]["source"])
    print(chunks[idx]["text"])
    print("-" * 50)


Retrieved chunks:

Source: BTech_Academic_Regulations.pdf
 of an Odd Semester (Julyâ€“November) and an Even Semester (Januaryâ€“May).
3. Credit System
The program follows a credit-based system. One credit corresponds to one hour of theory per week or two
hours of lab per week.
Students must earn all prescribed credits to be eligible for graduation.
4. Attendance Requirements
A minimum of 75% attendance is mandatory in each subject.
Students with attendance between 65% and 74% may be condoned upon payment of a fee.
Students with attendance below 65% are not
--------------------------------------------------
Source: Academic_Regulations.pdf
nts 
â€¢ Minimum 75% attendance is mandatory in each subject. 
â€¢ Students with attendance between 65%â€“74% may be condoned after paying a fee. 
â€¢ Students with attendance below 65% are not eligible to appear for examinations. 
 
5. Promotion Rules 
â€¢ Students are promoted semester-wise. 
â€¢ A student may carry a maximum of 2 backlogs to the ne

In [10]:
import os
from google import genai

In [11]:
# Create Gemini client using API key from environment variable
client = genai.Client(api_key=os.getenv("GEMINI_API_KEY"))

# Combine retrieved chunks into context
context = "\n\n".join([chunks[idx]["text"] for idx in I[0]])

prompt = f"""
You are a college assistant chatbot.

Answer the question ONLY using the context below.
If the answer is not present, say "Information not found in college documents."

Context:
{context}

Question:
{query}

Answer:
"""


In [12]:
response = client.models.generate_content(
    model="gemini-2.5-flash",
    contents=prompt
)

print(response.text)


A minimum of 75% attendance is mandatory in each subject.


In [13]:
print("ðŸŽ“ College Assistant Chatbot")
print("Type 'exit' to stop\n")

while True:
    query = input("You: ")
    
    if query.lower() == "exit":
        print("Chatbot: Goodbye ðŸ‘‹")
        break

    # Convert query to vector
    query_embedding = embedder.encode([query])

    # Retrieve top 2 chunks
    D, I = index.search(np.array(query_embedding), k=2)

    # Build context
    context = "\n\n".join([chunks[idx]["text"] for idx in I[0]])

    prompt = f"""
    You are a college assistant chatbot.

    Answer the question ONLY using the context below.
    If the answer is not present, say "Information not found in college documents."

    Context:
    {context}

    Question:
    {query}

    Answer:
    """

    response = client.models.generate_content(
        model="gemini-2.5-flash",
        contents=prompt
    )

    print("\nChatbot:", response.text)
    print("-" * 60)


ðŸŽ“ College Assistant Chatbot
Type 'exit' to stop



You:  ipl 2024 winner



Chatbot: Information not found in college documents.
------------------------------------------------------------


You:  exit


Chatbot: Goodbye ðŸ‘‹
