<a href="https://colab.research.google.com/github/afifasulthana1234/afifa/blob/main/studymate1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install faiss-cpu sentence-transformers PyMuPDF reportlab transformers ipywidgets


In [None]:
from reportlab.lib.pagesizes import letter
from reportlab.pdfgen import canvas

pdf_filename = "sample_study.pdf"
c = canvas.Canvas(pdf_filename, pagesize=letter)
c.setFont("Times-Roman", 12)

lines = [
    "The History of Artificial Intelligence",
    "",
    "Artificial Intelligence (AI) is the simulation of human intelligence in machines.",
    "Key Milestones:",
    "- 1956: The term 'Artificial Intelligence' was first coined at the Dartmouth Conference.",
    "- 1997: IBM's Deep Blue defeated world chess champion Garry Kasparov.",
    "- 2011: IBM Watson won the quiz show Jeopardy! against human champions.",
    "- 2023: Large Language Models like ChatGPT and Mistral became widely used.",
    "",
    "Applications of AI include NLP, computer vision, robotics, healthcare, finance, and education."
]

text_object = c.beginText(100, 750)
for line in lines:
    text_object.textLine(line)
c.drawText(text_object)
c.save()

print("✅ Dummy study PDF created:", pdf_filename)


In [None]:
import fitz
import faiss
import numpy as np
from sentence_transformers import SentenceTransformer

# Extract text
def extract_text_from_pdf(pdf_path):
    doc = fitz.open(pdf_path)
    text = ""
    for page in doc:
        text += page.get_text()
    return text

def chunk_text(text, chunk_size=300, overlap=50):
    words = text.split()
    chunks = []
    i = 0
    while i < len(words):
        chunk = words[i:i+chunk_size]
        chunks.append(" ".join(chunk))
        i += chunk_size - overlap
    return chunks

pdf_text = extract_text_from_pdf("sample_study.pdf")
chunks = chunk_text(pdf_text)
print("✅ Number of chunks:", len(chunks))

# Build FAISS
embedder = SentenceTransformer("all-MiniLM-L6-v2")
embeddings = embedder.encode(chunks, convert_to_numpy=True)

dimension = embeddings.shape[1]
index = faiss.IndexFlatL2(dimension)
index.add(embeddings)

print("✅ FAISS index ready with", index.ntotal, "chunks")


In [None]:
from transformers import pipeline
import ipywidgets as widgets
from IPython.display import display, Markdown

qa_model = pipeline("text2text-generation", model="google/flan-t5-base")

# Text input box
question_box = widgets.Text(
    value="",
    placeholder="Type your question here",
    description="Question:",
    layout=widgets.Layout(width="100%")
)

output_box = widgets.Output()

def answer_question(change):
    output_box.clear_output()
    query = change["new"]
    if query.strip() == "":
        return

    q_emb = embedder.encode([query], convert_to_numpy=True)
    distances, indices = index.search(q_emb, 3)
    retrieved = [chunks[i] for i in indices[0]]
    context = "\n".join(retrieved)

    prompt = f"Context: {context}\n\nQuestion: {query}\nAnswer:"
    answer = qa_model(prompt, max_length=200)[0]["generated_text"]

    with output_box:
        display(Markdown(f"### 📖 Answer:\n{answer}"))
        print("\n📌 References:")
        for ref in retrieved:
            print("-", ref[:200], "...")

question_box.observe(answer_question, names="value")
display(question_box, output_box)


In [None]:
import ipywidgets as widgets
from IPython.display import display, Markdown

# Input text box
question_box = widgets.Text(
    value="",
    placeholder="Type your question here",
    description="Question:",
    layout=widgets.Layout(width="70%")
)

# Submit button
ask_button = widgets.Button(
    description="Ask",
    button_style="info",
    tooltip="Click to get answer",
    icon="search"
)

# Output area
output_box = widgets.Output()

def on_button_click(b):
    query = question_box.value.strip()
    if query == "":
        return

    # Search in FAISS
    q_emb = embedder.encode([query], convert_to_numpy=True)
    distances, indices = index.search(q_emb, 3)
    retrieved = [chunks[i] for i in indices[0]]
    context = "\n".join(retrieved)

    # Generate answer
    prompt = f"Context: {context}\n\nQuestion: {query}\nAnswer:"
    answer = qa_model(prompt, max_length=200)[0]["generated_text"]

    # Show answer
    with output_box:
        display(Markdown(f"### ❓ Question: {query}"))
        display(Markdown(f"### 📖 Answer:\n{answer}"))
        print("\n📌 References:")
        for ref in retrieved:
            print("-", ref[:200], "...")

    # Reset question box for next input
    question_box.value = ""

ask_button.on_click(on_button_click)

# Display UI
display(question_box, ask_button, output_box)
