In [1]:
!pip install transformers torch gradio pymupdf langchain sentence-transformers faiss-cpu langchain-community requests==2.32.4 --quiet

import gradio as gr
import fitz  # PyMuPDF
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS

# 🔹 Load IBM Granite model
model_id = "ibm-granite/granite-3.3-2b-instruct"
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(model_id).to("cuda" if torch.cuda.is_available() else "cpu")

def extract_text(pdf):
    # Handle different gradio file input formats
    if isinstance(pdf, str):
        path = pdf
    elif hasattr(pdf, "name"):  # file-like object
        path = pdf.name
    elif isinstance(pdf, dict) and "name" in pdf:
        path = pdf["name"]
    else:
        raise ValueError("Unsupported file format received from Gradio input")

    doc = fitz.open(path)
    return "\n".join(page.get_text() for page in doc)

# 🔹 Build vector store
def build_vectorstore(texts):
    splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
    chunks = []
    for text in texts:
        chunks.extend(splitter.split_text(text))
    embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
    return FAISS.from_texts(chunks, embeddings)

# 🔹 Ask question using IBM Granite
chat_history = []

def ask_question(pdfs, question):
    texts = [extract_text(pdf.name) for pdf in pdfs]
    vectorstore = build_vectorstore(texts)
    docs = vectorstore.similarity_search(question, k=3)
    context = "\n\n".join(doc.page_content for doc in docs)

    prompt = f"""You are StudyMate, an academic assistant.
Use the context below to answer the question.

Context:
{context}

Question: {question}
Answer:"""

    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
    outputs = model.generate(**inputs, max_new_tokens=300)
    answer = tokenizer.decode(outputs[0], skip_special_tokens=True).replace(prompt, "").strip()


    chat_history.append((question, answer))
    return answer, chat_history

# 🔹 Gradio UI
with gr.Blocks(theme=gr.themes.Soft(primary_hue="blue")) as demo:
    gr.Markdown("## 📘 StudyMate\nYour AI-powered academic assistant")

    with gr.Row():
        pdf_input = gr.File(label="Upload PDFs", file_types=[".pdf"], file_count="multiple")
        question_input = gr.Textbox(label="Ask a question", placeholder="e.g. What is the main idea of Chapter 3?")

    with gr.Row():
        submit_btn = gr.Button("Get Answer", variant="primary")
        answer_output = gr.Textbox(label="Answer", lines=10, interactive=False)

    chatbox = gr.Chatbot(label="Chat History")

    def handle_query(pdfs, question):
        if not pdfs or not question.strip():
            return "Please upload PDFs and ask a question.", chat_history
        answer, history = ask_question(pdfs, question)
        return answer, history

    submit_btn.click(fn=handle_query, inputs=[pdf_input, question_input], outputs=[answer_output, chatbox])


demo.launch(share=True)

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m24.1/24.1 MB[0m [31m73.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m31.4/31.4 MB[0m [31m51.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.5/2.5 MB[0m [31m65.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m50.9/50.9 kB[0m [31m4.0 MB/s[0m eta [36m0:00:00[0m
[?25h

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json: 0.00B [00:00, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

merges.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

added_tokens.json:   0%|          | 0.00/207 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/801 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/787 [00:00<?, ?B/s]

model.safetensors.index.json: 0.00B [00:00, ?B/s]

Fetching 2 files:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/67.1M [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/132 [00:00<?, ?B/s]

  chatbox = gr.Chatbot(label="Chat History")


Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://7d271f517f6c83bef6.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


