In [1]:

#Import the required libraries
import os
import gradio as gr
from langchain_community.document_loaders import PyPDFLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain_openai import OpenAIEmbeddings
from langchain_community.vectorstores import Chroma
from langchain.chains import RetrievalQA
from langchain.chat_models import ChatOpenAI



The OpenAI API key is required

In [2]:
#Set your OpenAI API key
os.environ["OPENAI_API_KEY"] = ""

This loads the PDF with the info which will be used. this uses the PyPDFLoader

In [3]:
# Step  Load PDF
loader = PyPDFLoader("sample_annual_report.pdf")
documents = loader.load()

This code is splitting a list of documents into smaller text chunks to make them easier to process (e.g., for embedding or retrieval tasks in NLP).

In [4]:
# Split into chunks
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
texts = text_splitter.split_documents(documents)

This code embeds text chunks and stores them in a vector database ChromaDB for retrieval:

In [5]:
# Embed and store with ChromaDB
embeddings = OpenAIEmbeddings()
db = Chroma.from_documents(texts, embedding=embeddings)
retriever = db.as_retriever()

This code sets up a Question-Answering (QA) system using a language model and a retriever:

ChatOpenAI(...): Initializes the OpenAI language model (gpt-3.5-turbo) for answering questions.

RetrievalQA.from_chain_type(...): Creates a retrieval-augmented QA system that:

Uses the retriever to find relevant text chunks, Feeds them to the LLM to generate accurate answers based on context.



In [6]:
# Set up QA chain
llm = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0)
qa_chain = RetrievalQA.from_chain_type(llm=llm, retriever=retriever)


  llm = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0)


In [7]:
#  Test Ask a question
question = "Give me the key metrics and a summary in 100 words."
response = qa_chain.run(question)

print("Response:\n", response)

  response = qa_chain.run(question)


ðŸ§  Response:
 Key Financial Metrics for 2024:
- Total Revenue: $150,000,000
- Net Profit: $32,500,000
- Operating Margin: 21.7%
- Earnings Per Share (EPS): $2.45
- Total Employees: 1,250

Summary:
In 2024, the company achieved significant growth with a 15% increase in revenue driven by strategic investments in technology and international markets. The company reported a total revenue of $150 million, a net profit of $32.5 million, an operating margin of 21.7%, and earnings per share of $2.45. With a positive outlook for 2025, the company plans to continue its growth trajectory by expanding into the Asia-Pacific region and investing further in AI to enhance operational efficiency.


In [8]:
#  Step 6: Add Gradio UI for interactive Q&A with prompt options

def ask_question_gradio(prompt_choice, user_question):
    # Define prompt templates
    prompt_templates = {
        "Summary": "Please provide a concise summary of the document.",
        "Extract Metrics": "Extract all key numerical metrics from the document.",
        "Key Insights": "What are the main insights and takeaways?"
    }

    # Use the selected prompt as a prefix + user question
    if user_question.strip():
        final_query = f"{prompt_templates[prompt_choice]} Question: {user_question}"
    else:
        final_query = prompt_templates[prompt_choice]

    return qa_chain.run(final_query)

with gr.Blocks() as demo:
    gr.Markdown("## Chat with sample_annual_report.pdf")

    with gr.Row():
        prompt_dropdown = gr.Dropdown(
            choices=["Summary", "Extract Metrics", "Key Insights"],
            value="Summary",
            label="Select Prompt Type"
        )
        user_input = gr.Textbox(label="Ask a question (optional)")

    submit_btn = gr.Button("Ask")
    answer_output = gr.Textbox(label="Answer", lines=10)

    submit_btn.click(fn=ask_question_gradio, inputs=[prompt_dropdown, user_input], outputs=[answer_output])

demo.launch(inline=True)  # inline=True for Jupyter Notebook


Running on local URL:  http://127.0.0.1:7860

To create a public link, set `share=True` in `launch()`.


