In [1]:
# =========================
# 1. Install dependencies
# =========================
!pip install transformers accelerate torch PyPDF2 gradio

# =========================
# 2. Imports
# =========================
from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM
import torch, PyPDF2, gradio as gr

# =========================
# 3. Load Granite model
# =========================
model_name = "ibm-granite/granite-3.3-2b-instruct"

pipe = pipeline("text-generation", model=model_name, device=0 if torch.cuda.is_available() else -1)

tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32
).to("cuda" if torch.cuda.is_available() else "cpu")

print("✅ Granite model loaded successfully!")

# =========================
# 4. PDF Loader
# =========================
def load_pdf(pdf_file):
    pdf_text = ""
    reader = PyPDF2.PdfReader(pdf_file)
    for page in reader.pages:
        pdf_text += page.extract_text() + "\n"
    return pdf_text

# =========================
# 5. Chunking PDF text
# =========================
def chunk_text(text, max_tokens=800):
    words = text.split()
    chunks, current = [], []
    token_count = 0
    for word in words:
        current.append(word)
        token_count += 1
        if token_count >= max_tokens:
            chunks.append(" ".join(current))
            current, token_count = [], 0
    if current:
        chunks.append(" ".join(current))
    return chunks

# =========================
# 6. QA Function
# =========================
pdf_text_cache = ""  # global cache to hold PDF text

def process_pdf(pdf_file):
    global pdf_text_cache
    pdf_text_cache = load_pdf(pdf_file.name)
    return "📘 PDF loaded successfully! You can now ask questions."

def ask_question(question, use_pipeline=True):
    global pdf_text_cache
    if not pdf_text_cache:
        return "⚠️ Please upload a PDF first."

    # Split PDF into chunks
    chunks = chunk_text(pdf_text_cache, max_tokens=600)
    selected = max(chunks, key=lambda c: question.lower() in c.lower() if question.lower() in c.lower() else 0)

    # Prompt
    prompt = f"""You are an AI assistant verifying medical prescriptions and answering student questions.
    Context (from PDF study material or prescription):
    {selected}

    Question: {question}
    Answer:"""

    if use_pipeline:
        response = pipe(prompt, max_new_tokens=300, do_sample=True, temperature=0.7)
        return response[0]['generated_text']
    else:
        messages = [
            {"role": "system", "content": "You are an AI tutor and prescription verifier."},
            {"role": "user", "content": prompt}
        ]
        inputs = tokenizer.apply_chat_template(
            messages,
            add_generation_prompt=True,
            tokenize=True,
            return_dict=True,
            return_tensors="pt"
        ).to(model.device)

        outputs = model.generate(**inputs, max_new_tokens=300)
        return tokenizer.decode(outputs[0][inputs["input_ids"].shape[-1]:], skip_special_tokens=True)

# =========================
# 7. Gradio UI
# =========================
with gr.Blocks() as demo:
    gr.Markdown("## 📘 AI Medical Prescription Verification & PDF Q&A (Granite-powered)")

    with gr.Row():
        pdf_upload = gr.File(label="Upload your PDF", file_types=[".pdf"])
        status = gr.Textbox(label="Status", interactive=False)

    with gr.Row():
        question = gr.Textbox(label="Ask a Question")
        answer = gr.Textbox(label="Answer", interactive=False)

    pdf_upload.change(process_pdf, inputs=pdf_upload, outputs=status)
    question.submit(ask_question, inputs=question, outputs=answer)

demo.launch()



Collecting PyPDF2
  Downloading pypdf2-3.0.1-py3-none-any.whl.metadata (6.8 kB)
Downloading pypdf2-3.0.1-py3-none-any.whl (232 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m232.6/232.6 kB[0m [31m4.1 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: PyPDF2
Successfully installed PyPDF2-3.0.1


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/787 [00:00<?, ?B/s]

model.safetensors.index.json: 0.00B [00:00, ?B/s]

Fetching 2 files:   0%|          | 0/2 [00:00<?, ?it/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/67.1M [00:00<?, ?B/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/132 [00:00<?, ?B/s]

tokenizer_config.json: 0.00B [00:00, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

merges.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

added_tokens.json:   0%|          | 0.00/207 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/801 [00:00<?, ?B/s]

Device set to use cpu
`torch_dtype` is deprecated! Use `dtype` instead!


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

✅ Granite model loaded successfully!
It looks like you are running Gradio on a hosted Jupyter notebook, which requires `share=True`. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://d0ae076eae8aacc269.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


