<a href="https://colab.research.google.com/github/ekerintaiwoa/MediaApp/blob/master/bookaiagent.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install -q gradio faiss-cpu sentence-transformers openai pymupdf

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m31.3/31.3 MB[0m [31m39.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m24.1/24.1 MB[0m [31m74.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m363.4/363.4 MB[0m [31m4.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m13.8/13.8 MB[0m [31m108.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m24.6/24.6 MB[0m [31m90.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m883.7/883.7 kB[0m [31m56.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m664.8/664.8 MB[0m [31m2.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m211.5/211.5 MB[0m [31m5.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

In [2]:
import os
import fitz  # PyMuPDF
import gradio as gr
import faiss
import numpy as np
from sentence_transformers import SentenceTransformer
import openai



In [5]:
# --- Set your OpenAI API key here or via Colab secrets/env ---
OPENAI_API_KEY = ""  #@param {type:"string"} # Add your OpenAI API key here

if OPENAI_API_KEY == "sk-abcdefabcdefabcdefabcdefabcdefabcdef12":
    raise ValueError("Please provide your OpenAI API key in the variable above!")

openai.api_key = OPENAI_API_KEY

In [7]:
 #--- Book Loading and Chunking ---
def load_pdf_text(pdf_file):
    doc = fitz.open(stream=pdf_file.read(), filetype="pdf")
    text = ""
    for page in doc:
        text += page.get_text()
    return text

def chunk_text(text, chunk_size=500, overlap=50):
    chunks = []
    start = 0
    while start < len(text):
        end = start + chunk_size
        chunk = text[start:end]
        chunks.append(chunk)
        start += chunk_size - overlap
    return chunks


In [8]:
# --- Vector Store with SentenceTransformer and FAISS ---
class BookVectorStore:
    def __init__(self):
        self.embedder = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
        self.index = None
        self.text_chunks = []

    def build_index(self, text_chunks):
        embeddings = self.embedder.encode(text_chunks, show_progress_bar=True)
        dim = embeddings.shape[1]
        self.index = faiss.IndexFlatL2(dim)
        self.index.add(np.array(embeddings).astype('float32'))
        self.text_chunks = text_chunks

    def query(self, query_text, top_k=5):
        q_emb = self.embedder.encode([query_text])
        D, I = self.index.search(np.array(q_emb).astype('float32'), top_k)
        results = [self.text_chunks[i] for i in I[0]]
        return results

vector_store = None  # Global vector store instance

# --- OpenAI Completion ---
def openai_completion(prompt, max_tokens=256):
    response = openai.Completion.create(
        engine="text-davinci-003",
        prompt=prompt,
        max_tokens=max_tokens,
        temperature=0.7,
        n=1,
        stop=None,
    )
    return response.choices[0].text.strip()

# --- Query the book ---
def ask_book_question(query, vector_store):
    relevant_chunks = vector_store.query(query, top_k=3)
    context = "\n\n".join(relevant_chunks)
    prompt = f"Answer the question based on the following excerpts from a book:\n\n{context}\n\nQuestion: {query}\nAnswer:"
    answer = openai_completion(prompt)
    return answer

# --- Generate learning aids ---
def generate_flashcards(text, count=5):
    prompt = f"Create {count} simple question-answer flashcards from the following text:\n\n{text}\n\nFlashcards:"
    return openai_completion(prompt)

def generate_summary(text):
    prompt = f"Summarize the following text into bullet points:\n\n{text}\n\nSummary:"
    return openai_completion(prompt)

# --- Gradio UI functions ---
def upload_book(file):
    global vector_store
    if file.name.endswith('.pdf'):
        text = load_pdf_text(file)
    else:
        text = file.read().decode('utf-8')
    chunks = chunk_text(text)
    vector_store = BookVectorStore()
    vector_store.build_index(chunks)
    return "Book loaded successfully! You can now ask questions."

def ask_question(query):
    if not vector_store:
        return "Please upload a book first."
    return ask_book_question(query, vector_store)

def flashcards():
    if not vector_store:
        return "Please upload a book first."
    text = "\n".join(vector_store.text_chunks[:3])
    return generate_flashcards(text)

def summary():
    if not vector_store:
        return "Please upload a book first."
    text = "\n".join(vector_store.text_chunks[:3])
    return generate_summary(text)


In [11]:
with gr.Blocks() as demo:
    gr.Markdown("# 📘 AI Book Assistant (Google Colab Version)")

    # Store the vector store persistently
    vector_state = gr.State()

    # UI components
    book_file = gr.File(label="Upload Book (PDF or TXT)")
    upload_status = gr.Textbox(label="Upload Status", interactive=False)
    question_input = gr.Textbox(label="Ask a Question About the Book")
    answer_output = gr.Textbox(label="Answer", interactive=False)
    flashcards_output = gr.Textbox(label="Flashcards", interactive=False)
    summary_output = gr.Textbox(label="Summary", interactive=False)

    upload_btn = gr.Button("📤 Load Book")
    ask_btn = gr.Button("❓ Ask")
    flashcards_btn = gr.Button("🧠 Generate Flashcards")
    summary_btn = gr.Button("📝 Generate Summary")

    # Functions using state
    def upload_book_with_state(file):
        if file.name.endswith(".pdf"):
            text = load_pdf_text(file)
        else:
            text = file.read().decode("utf-8")
        chunks = chunk_text(text)
        store = BookVectorStore()
        store.build_index(chunks)
        return "✅ Book uploaded and processed.", store

    def ask_question_with_state(query, store):
        if not store:
            return "⚠️ Please upload a book first."
        return ask_book_question(query, store)

    def flashcards_with_state(store):
        if not store:
            return "⚠️ Please upload a book first."
        return generate_flashcards("\n".join(store.text_chunks[:3]))

    def summary_with_state(store):
        if not store:
            return "⚠️ Please upload a book first."
        return generate_summary("\n".join(store.text_chunks[:3]))

    # Layout
    upload_btn.click(upload_book_with_state, inputs=book_file, outputs=[upload_status, vector_state])
    ask_btn.click(ask_question_with_state, inputs=[question_input, vector_state], outputs=answer_output)
    flashcards_btn.click(flashcards_with_state, inputs=vector_state, outputs=flashcards_output)
    summary_btn.click(summary_with_state, inputs=vector_state, outputs=summary_output)

demo.launch()


It looks like you are running Gradio on a hosted a Jupyter notebook. For the Gradio app to work, sharing must be enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://0bd3d7a0be05a7bc49.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)




In [None]:
# working colb

In [16]:
!pip install gradio langchain pypdf faiss-cpu sentence-transformers openai langchain-community

import gradio as gr
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from langchain.chains.question_answering import load_qa_chain
from langchain.llms import OpenAI
from pypdf import PdfReader
import os

# Load OpenAI API (or replace with other LLM if needed)
os.environ["OPENAI_API_KEY"] = "sk-abcdefabcdefabcdefabcdefabcdefabcdef12"  # ← Replace with your key

class BookVectorStore:
    def __init__(self):
        self.vector_store = None
        self.text_chunks = []

    def build_index(self, text_chunks):
        self.text_chunks = text_chunks
        embeddings = HuggingFaceEmbeddings()
        self.vector_store = FAISS.from_texts(text_chunks, embeddings)

    def search(self, query):
        docs = self.vector_store.similarity_search(query)
        return docs

def load_pdf_text(file_obj):
    try:
        reader = PdfReader(file_obj.name)
        raw_text = ""
        for page in reader.pages:
            raw_text += page.extract_text() or ""
        return raw_text
    except Exception as e:
        return f"Error reading PDF: {str(e)}"

def chunk_text(text):
    splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=150)
    return splitter.split_text(text)

def ask_book_question(question, store):
    docs = store.search(question)
    llm = OpenAI(temperature=0)
    chain = load_qa_chain(llm, chain_type="stuff")
    return chain.run(input_documents=docs, question=question)

def generate_flashcards(text):
    llm = OpenAI()
    return llm(f"Generate 5 flashcards from the following text:\n\n{text}")

def generate_summary(text):
    llm = OpenAI()
    return llm(f"Summarize this text:\n\n{text}")

# Gradio app with shared state
with gr.Blocks() as demo:
    gr.Markdown("## 📘 AI Book Assistant (Colab Ready)")

    vector_state = gr.State()

    book_file = gr.File(label="Upload Book (PDF)")
    upload_status = gr.Textbox(label="Upload Status")

    question_input = gr.Textbox(label="Ask a Question About the Book")
    answer_output = gr.Textbox(label="Answer")

    flashcards_output = gr.Textbox(label="Flashcards")
    summary_output = gr.Textbox(label="Summary")

    upload_btn = gr.Button("📤 Load Book")
    ask_btn = gr.Button("❓ Ask")
    flashcards_btn = gr.Button("🧠 Generate Flashcards")
    summary_btn = gr.Button("📝 Generate Summary")

def upload_book_with_state(file):
    if file is None:
        return "⚠️ No file uploaded.", None

    try:
        # Read PDF content from file bytes (not just file.name)
        from io import BytesIO
        pdf_reader = PdfReader(BytesIO(file.read()))
        text = ""
        for page in pdf_reader.pages:
            text += page.extract_text() or ""

        if not text.strip():
            return "⚠️ No readable text found in PDF.", None

        chunks = chunk_text(text)
        store = BookVectorStore()
        store.build_index(chunks)
        return "✅ Book processed successfully!", store

    except Exception as e:
        return f"❌ Error reading book: {str(e)}", None


    def ask_question_with_state(query, store):
        if not store:
            return "⚠️ Please upload a book first."
        return ask_book_question(query, store)

    def flashcards_with_state(store):
        if not store:
            return "⚠️ Please upload a book first."
        return generate_flashcards("\n".join(store.text_chunks[:3]))

    def summary_with_state(store):
        if not store:
            return "⚠️ Please upload a book first."
        return generate_summary("\n".join(store.text_chunks[:3]))

    upload_btn.click(upload_book_with_state, inputs=book_file, outputs=[upload_status, vector_state])
    ask_btn.click(ask_question_with_state, inputs=[question_input, vector_state], outputs=answer_output)
    flashcards_btn.click(flashcards_with_state, inputs=vector_state, outputs=flashcards_output)
    summary_btn.click(summary_with_state, inputs=vector_state, outputs=summary_output)

demo.launch()

It looks like you are running Gradio on a hosted a Jupyter notebook. For the Gradio app to work, sharing must be enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://8003ad0c48f6fa1047.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


