In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
#Install Required Packages

!pip install PyMuPDF            # For reading and extracting text from PDF
!pip install langchain          # For chunking text, building LLM chains
!pip install sentence-transformers  # For generating sentence embeddings
!pip install faiss-cpu          # For storing and searching text vectors
!pip install groq langchain-groq  # For using Groq LLMs with LangChain
!pip install gTTS              # For converting text to speech

In [None]:
# Extract Text from PDF

#!pip install PyMuPDF

import fitz  # PyMuPDF

pdf_path = "/content/drive/MyDrive/Novel_office/aesops_fables.pdf"
doc = fitz.open(pdf_path)

text = ""
for page in doc:
    text += page.get_text()

print(text[:1000])  # Preview first 1000 characters


In [None]:
# Split Text into Chunks

#!pip install langchain

from langchain.text_splitter import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=500,
    chunk_overlap=100
)

chunks = text_splitter.split_text(text)
print(f"Total chunks: {len(chunks)}")
print(chunks[:2])  # Show first 2 chunks

In [None]:
# !pip install sentence-transformers



In [None]:
from langchain.embeddings import SentenceTransformerEmbeddings

embedding_model = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")

In [None]:
# Store Chunks in FAISS

#!pip install faiss-cpu

from langchain.vectorstores import FAISS

vectorstore = FAISS.from_texts(chunks, embedding_model)

In [None]:
# Set Groq API Key

#!pip install groq langchain-groq

import os
os.environ["GROQ_API_KEY"] = "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"

In [None]:
# Create Groq LLM Object

from langchain_groq import ChatGroq

llm = ChatGroq(
    model_name="llama3-8b-8192",  # Groq supported model
    temperature=0.3
)

In [None]:
# Set Up Retrieval-based QA Chain

from langchain.chains import RetrievalQA

qa_chain = RetrievalQA.from_chain_type(
    llm=llm,
    retriever=vectorstore.as_retriever(search_type="similarity", k=3),
    return_source_documents=True
)

In [None]:
# Ask Sample Query(for checking)

query = "What is the moral of the story 'The Tortoise and the Hare'?"
response = qa_chain(query)
print("Answer:", response['result'])

In [None]:
# # Chat Interface with Summarize + Loop

# import ipywidgets as widgets
# from IPython.display import display, clear_output

# # Input Box for Questions
# query_input = widgets.Text(
#     value='',
#     placeholder='Ask a question from the document...',
#     description='You:',
#     layout=widgets.Layout(width='100%')
# )

# def run_chat(query):
#     clear_output(wait=True)

#     if query.lower() == "exit":
#         print(" Chat ended.")
#         return  # Stop the loop

#     response = qa_chain(query)
#     print(f" Question: {query}\n")
#     print(" Answer:\n", response['result'])

#     sources = response['source_documents']

#     #  Summarize Button (query-aware)
#     def summarize_story(b):
#         summary_input = f"Summarize only the story related to this question: '{query}'\n\nContent: {sources[0].page_content}"
#         summary = llm.invoke(summary_input)
#         print("\n Summary:\n", summary.content)

#     summarize_button = widgets.Button(description=" Summarize It")
#     summarize_button.on_click(summarize_story)

#     display(query_input, summarize_button)
#     query_input.value = ''  # clear input box after submission

# def on_enter(change):
#     if change['name'] == 'value' and change['new']:
#         run_chat(change['new'])

# query_input.observe(on_enter)
# display(query_input)


In [None]:
# #  Chat Interface with Summarize + Explain Like I'm 5 + Loop

# import ipywidgets as widgets
# from IPython.display import display, clear_output

# #  Input Box for Questions
# query_input = widgets.Text(
#     value='',
#     placeholder='Ask a question from the document...',
#     description='You:',
#     layout=widgets.Layout(width='100%')
# )

# def run_chat(query):
#     clear_output(wait=True)

#     if query.lower() == "exit":
#         print(" Chat ended.")
#         return  # Stop the loop

#     response = qa_chain(query)

#     print(f" Question: {query}\n")
#     print(" Answer:\n", response['result'])

#     sources = response['source_documents']

#     #  Summarize Button
#     def summarize_story(b):
#         summary_input = f"Summarize only the story related to this question: '{query}'\n\nContent: {sources[0].page_content}"
#         summary = llm.invoke(summary_input)
#         print("\n Summary:\n", summary.content)

#     summarize_button = widgets.Button(description=" Summarize It")
#     summarize_button.on_click(summarize_story)

#     #  Explain Like I'm 5 Button
#     def explain_like_5(b):
#         explain_input = f"Explain like I’m 5: {query}\n\nUse this content to help: {sources[0].page_content}"
#         explanation = llm.invoke(explain_input)
#         print("\n Explained Like I’m 5:\n", explanation.content)

#     explain_button = widgets.Button(description=" Explain Like I'm 5")
#     explain_button.on_click(explain_like_5)

#     display(query_input, summarize_button, explain_button)
#     query_input.value = ''  # Clear after each input

# def on_enter(change):
#     if change['name'] == 'value' and change['new']:
#         run_chat(change['new'])

# query_input.observe(on_enter)
# display(query_input)


In [None]:
# !pip install gTTS

In [None]:
# Chat Interface with 3 Sections: Answer + Summarize + Explain Like I'm 5
# Each with its own  Speak button

from gtts import gTTS
from IPython.display import display, Audio, clear_output
import ipywidgets as widgets

#  Text-to-Speech Function
def speak_text(text):
    tts = gTTS(text)
    tts.save("output.mp3")
    display(Audio("output.mp3", autoplay=True))

#  Input Box for Questions
query_input = widgets.Text(
    value='',
    placeholder='Ask a question from the document...',
    description='You:',
    layout=widgets.Layout(width='100%')
)

#  Main Chat Function
def run_chat(query):
    clear_output(wait=True)

    if query.lower() == "exit":
        print(" Chat ended.")
        return

    response = qa_chain(query)

    print(f" Question: {query}\n")
    print(" Answer:\n", response['result'])

    answer_text = response['result']
    sources = response['source_documents']

    #  Speak Answer Button
    def speak_answer(b):
        speak_text(answer_text)

    speak_answer_button = widgets.Button(description=" Speak Answer")
    speak_answer_button.on_click(speak_answer)

    #  Summarize Button + Speaker
    def summarize_story(b):
        summary_input = f"Summarize only the story related to this question: '{query}'\n\nContent: {sources[0].page_content}"
        summary = llm.invoke(summary_input)
        summary_text = summary.content
        print("\n Summary:\n", summary_text)

        def speak_summary(bs):
            speak_text(summary_text)

        speak_summary_button = widgets.Button(description=" Speak Summary")
        speak_summary_button.on_click(speak_summary)
        display(speak_summary_button)

    summarize_button = widgets.Button(description=" Summarize It")
    summarize_button.on_click(summarize_story)

    #  Explain Like I'm 5 Button + Speaker
    def explain_like_5(b):
        explain_input = f"Explain like I’m 5: {query}\n\nUse this content to help: {sources[0].page_content}"
        explanation = llm.invoke(explain_input)
        explanation_text = explanation.content
        print("\n Explained Like I’m 5:\n", explanation_text)

        def speak_explanation(bs):
            speak_text(explanation_text)

        speak_explain_button = widgets.Button(description=" Speak Like I'm 5")
        speak_explain_button.on_click(speak_explanation)
        display(speak_explain_button)

    explain_button = widgets.Button(description=" Explain Like I'm 5")
    explain_button.on_click(explain_like_5)

    #  Show All Buttons
    display(query_input, speak_answer_button, summarize_button, explain_button)
    query_input.value = ''  # Clear after each input

#  Trigger Chat on Enter
def on_enter(change):
    if change['name'] == 'value' and change['new']:
        run_chat(change['new'])

query_input.observe(on_enter)
display(query_input)
