In [7]:
!pip install transformers torch gradio pymupdf langchain sentence-transformers faiss-cpu --quiet

import gradio as gr
import fitz  # PyMuPDF
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS

# 🔹 Load IBM Granite model
model_id = "ibm-granite/granite-3.3-2b-instruct"
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(model_id).to("cuda" if torch.cuda.is_available() else "cpu")

# 🔹 PDF text extraction
def extract_text(pdf_file):
    doc = fitz.open(pdf_file)
    return "\n".join(page.get_text() for page in doc)

# 🔹 Build vector store
def build_vectorstore(texts):
    splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
    chunks = []
    for text in texts:
        chunks.extend(splitter.split_text(text))
    embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
    return FAISS.from_texts(chunks, embeddings)

# 🔹 Ask question using IBM Granite
chat_history = []

def ask_question(pdfs, question):
    texts = [extract_text(pdf) for pdf in pdfs]
    vectorstore = build_vectorstore(texts)
    docs = vectorstore.similarity_search(question, k=3)
    context = "\n\n".join(doc.page_content for doc in docs)

    messages = [
        {"role": "user", "content": f"""You are StudyMate, an academic assistant. Use the context below to answer the question.\n\nContext:\n{context}\n\nQuestion: {question}\nAnswer:"""}
    ]
    inputs = tokenizer.apply_chat_template(messages, add_generation_prompt=True, tokenize=True, return_dict=True, return_tensors="pt").to(model.device)
    outputs = model.generate(**inputs, max_new_tokens=300)
    answer = tokenizer.decode(outputs[0][inputs["input_ids"].shape[-1]:]).strip()

    chat_history.append((question, answer))
    return answer, chat_history

# 🔹 Gradio UI
with gr.Blocks(theme=gr.themes.Soft(primary_hue="blue")) as demo:
    gr.Markdown("## 📘 StudyMate\nYour AI-powered academic assistant")

    with gr.Row():
        pdf_input = gr.File(label="Upload PDFs", file_types=[".pdf"], file_count="multiple")
        question_input = gr.Textbox(label="Ask a question", placeholder="e.g. What is the main idea of Chapter 3?")

    with gr.Row():
        submit_btn = gr.Button("Get Answer", variant="primary")
        answer_output = gr.Textbox(label="Answer", lines=10, interactive=False)

    chatbox = gr.Chatbot(label="Chat History")

    def handle_query(pdfs, question):
        answer, history = ask_question(pdfs, question)
        return answer, history

    submit_btn.click(fn=handle_query, inputs=[pdf_input, question_input], outputs=[answer_output, chatbox])

demo.launch(share=True)


ModuleNotFoundError: Module langchain_community.embeddings not found. Please install langchain-community to access this module. You can install it using `pip install -U langchain-community`

In [8]:
pip install transformers torch gradio pymupdf langchain sentence-transformers faiss-cpu --quiet

import gradio as gr
import fitz  # PyMuPDF
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS

# 🔹 Load IBM Granite model
model_id = "ibm-granite/granite-3.3-2b-instruct"
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(model_id).to("cuda" if torch.cuda.is_available() else "cpu")

# 🔹 PDF text extraction
def extract_text(pdf_path):
    doc = fitz.open(pdf_path)
    return "\n".join(page.get_text() for page in doc)

# 🔹 Build vector store
def build_vectorstore(texts):
    splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
    chunks = []
    for text in texts:
        chunks.extend(splitter.split_text(text))
    embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
    return FAISS.from_texts(chunks, embeddings)

# 🔹 Ask question using IBM Granite
chat_history = []

def ask_question(pdfs, question):
    texts = [extract_text(pdf) for pdf in pdfs]
    vectorstore = build_vectorstore(texts)
    docs = vectorstore.similarity_search(question, k=3)
    context = "\n\n".join(doc.page_content for doc in docs)

    prompt = f"""You are StudyMate, an academic assistant.
Use the context below to answer the question.

Context:
{context}

Question: {question}
Answer:"""

    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
    outputs = model.generate(**inputs, max_new_tokens=300)
    answer = tokenizer.decode(outputs[0], skip_special_tokens=True).replace(prompt, "").strip()

    chat_history.append((question, answer))
    return answer, chat_history

# 🔹 Gradio UI
with gr.Blocks(theme=gr.themes.Soft(primary_hue="blue")) as demo:
    gr.Markdown("## 📘 StudyMate\nYour AI-powered academic assistant")

    with gr.Row():
        pdf_input = gr.File(label="Upload PDFs", file_types=[".pdf"], file_count="multiple")
        question_input = gr.Textbox(label="Ask a question", placeholder="e.g. What is the main idea of Chapter 3?")

    with gr.Row():
        submit_btn = gr.Button("Get Answer", variant="primary")
        answer_output = gr.Textbox(label="Answer", lines=10, interactive=False)

    chatbox = gr.Chatbot(label="Chat History")

    def handle_query(pdfs, question):
        if not pdfs or not question.strip():
            return "Please upload PDFs and ask a question.", chat_history
        answer, history = ask_question(pdfs, question)
        return answer, history

    submit_btn.click(fn=handle_query, inputs=[pdf_input, question_input], outputs=[answer_output, chatbox])

demo.launch(share=True)

SyntaxError: invalid syntax (ipython-input-3382005982.py, line 1)