In [1]:
# 🔧 Step 1: Install dependencies
!pip install transformers sentence-transformers faiss-cpu pymupdf gradio --quiet

# 📚 Step 2: Imports
import os
import fitz  # PyMuPDF
import torch
import faiss
import gradio as gr

from sentence_transformers import SentenceTransformer
from transformers import AutoTokenizer, AutoModelForCausalLM

# 🧠 Load embedding model (for semantic search)
embedding_model = SentenceTransformer('all-MiniLM-L6-v2')

# 🧠 Load IBM Granite model
tokenizer = AutoTokenizer.from_pretrained("ibm-granite/granite-3.3-2b-instruct")
model = AutoModelForCausalLM.from_pretrained("ibm-granite/granite-3.3-2b-instruct")
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

# 📄 Global variables
docs_chunks = []
chunk_embeddings = None
chunk_size = 500  # characters per chunk
overlap = 100
index = None


# 📄 Step 3: Text extraction and chunking from PDF
def extract_chunks_from_pdf(file):
    text = ""
    with fitz.open(stream=file.read(), filetype="pdf") as doc:
        for page in doc:
            page_text = page.get_text()
            text += page_text + "\n"

    # Chunking with overlap
    chunks = []
    for i in range(0, len(text), chunk_size - overlap):
        chunk = text[i:i + chunk_size]
        if len(chunk.strip()) > 0:
            chunks.append(chunk.strip())
    return chunks


# 🔍 Step 4: Index all chunks using FAISS
def build_faiss_index(chunks):
    embeddings = embedding_model.encode(chunks)
    index = faiss.IndexFlatL2(embeddings.shape[1])
    index.add(embeddings)
    return index, embeddings


# 🧠 Step 5: Answer generation using IBM Granite
def generate_answer(context, question):
    messages = [
        {"role": "system", "content": "You are a helpful academic assistant that answers questions using the provided context."},
        {"role": "user", "content": f"Context:\n{context}\n\nQuestion: {question}"}
    ]

    inputs = tokenizer.apply_chat_template(
        messages,
        add_generation_prompt=True,
        tokenize=True,
        return_dict=True,
        return_tensors="pt",
    ).to(device)

    output = model.generate(**inputs, max_new_tokens=256)
    response = tokenizer.decode(output[0][inputs["input_ids"].shape[-1]:], skip_special_tokens=True)
    return response.strip()


# 💬 Step 6: Full Q&A pipeline
def ask_question(question):
    if not docs_chunks or not chunk_embeddings:
        return "Please upload and process at least one PDF first."

    # Embed the question
    question_embedding = embedding_model.encode([question])

    # Search top 3 relevant chunks
    D, I = index.search(question_embedding, k=3)
    top_chunks = [docs_chunks[i] for i in I[0]]
    context = "\n---\n".join(top_chunks)

    # Generate answer
    return generate_answer(context, question)


# 🗂️ Step 7: PDF upload handler
def process_pdfs(files):
    global docs_chunks, chunk_embeddings, index

    all_chunks = []
    for file in files:
        chunks = extract_chunks_from_pdf(file)
        all_chunks.extend(chunks)

    docs_chunks = all_chunks
    index, chunk_embeddings = build_faiss_index(docs_chunks)
    return f"✅ {len(docs_chunks)} chunks extracted and indexed from {len(files)} PDF(s). You can now ask your questions."


# 🎨 Step 8: Gradio UI
with gr.Blocks() as demo:
    gr.Markdown("# 📘 StudyMate: AI-Powered PDF-Based Q&A System for Students")
    gr.Markdown("Upload your textbooks, lecture notes, or papers, then ask questions and get direct, contextual answers.")

    with gr.Row():
        pdf_input = gr.File(label="Upload PDFs", file_types=[".pdf"], file_count="multiple")
        process_button = gr.Button("📄 Process PDFs")

    status_output = gr.Textbox(label="Status")

    with gr.Row():
        question_input = gr.Textbox(label="Ask a Question", placeholder="E.g. What is Newton's second law?")
        answer_output = gr.Textbox(label="Answer")

    process_button.click(fn=process_pdfs, inputs=[pdf_input], outputs=[status_output])
    question_input.submit(fn=ask_question, inputs=[question_input], outputs=[answer_output])

# 🚀 Step 9: Launch app
demo.launch()


[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m31.4/31.4 MB[0m [31m19.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m24.1/24.1 MB[0m [31m24.5 MB/s[0m eta [36m0:00:00[0m
[?25h

Error while fetching `HF_TOKEN` secret value from your vault: 'Requesting secret HF_TOKEN timed out. Secrets can only be fetched when running from the Colab UI.'.
You are not authenticated with the Hugging Face Hub in this notebook.
If the error persists, please let us know by opening an issue on GitHub (https://github.com/huggingface/huggingface_hub/issues/new).


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md: 0.00B [00:00, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

tokenizer_config.json: 0.00B [00:00, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

merges.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

added_tokens.json:   0%|          | 0.00/207 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/801 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/787 [00:00<?, ?B/s]

model.safetensors.index.json: 0.00B [00:00, ?B/s]

Fetching 2 files:   0%|          | 0/2 [00:00<?, ?it/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/67.1M [00:00<?, ?B/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/132 [00:00<?, ?B/s]

It looks like you are running Gradio on a hosted Jupyter notebook, which requires `share=True`. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://f9c87492d6fae2a2f3.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


