In [133]:
# !pip install PyMuPDF

In [134]:
import pymupdf
import datetime

from langchain_core.tools import Tool

from langchain.agents.format_scratchpad.openai_tools import (
    format_to_openai_tool_messages,
)
from langchain.agents.output_parsers.openai_tools import OpenAIToolsAgentOutputParser
from langchain.agents import AgentExecutor
from langchain_openai import ChatOpenAI

from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder

In [135]:
from langfuse.callback import CallbackHandler
langfuse_handler = CallbackHandler(
    # localhost so does not matter
    secret_key="sk-lf-c650de69-a630-4c5a-b5bc-93a9745f663d",
    public_key="pk-lf-344d1105-8258-461a-bbc8-5cb5330389a9",
    host="http://localhost:3000",
)

In [136]:
doc = None

In [137]:
llm = ChatOpenAI(
    model="gpt-4o",
    max_retries=2,
    temperature=0.75,
)

In [138]:
def open_pdf(pdf_name):
    global doc
    doc = pymupdf.open(pdf_name)
    return f"Opened {pdf_name} pdf file for analysis. The document has {len(doc)} pages."

open_pdf_tool = Tool.from_function(
    name='open_pdf_tool',
    func=open_pdf,
    description="Opens a PDF file for analysis. Takes in the pdf file name. We need to open a pdf before we can ask information about it.",
)

In [139]:
get_page_details_tool = Tool.from_function(
    name='get_page_details_tool',
    func=lambda page_number: doc[int(page_number)].get_text(),
    description="Gets the text from a page in the pdf. Takes in the page number and returns the text in the page. We need to open the pdf using open_pdf_tool before we can ask information about it.",
)

In [140]:
tools = [open_pdf_tool, get_page_details_tool]

In [141]:
pdf_analyzer_prompt = ChatPromptTemplate.from_messages([
    (
        "system",
        "Tell me all the tools that you have access to"
        # """
        # You are a research agent. You will be given a pdf file. This file can only be read by the tools provided to you. The pdf file has multiple pages 
        # with information in it. You need to read the file and create a brief summary of the information in the file.

        # Do not ask for the pdf file. You can access the pdf using tools provided to you.

        # Return a result in the markdown format.

        #  Return a research report in the markdown format.
        # """
    ),
    ("human", "Pdf file: {pdf_file}"),
    MessagesPlaceholder(variable_name="agent_scratchpad"),
])

In [142]:
def agent_scratchpad_formatter(x):
    return format_to_openai_tool_messages(x["intermediate_steps"])

llm.bind_tools(tools)

agent = (
    {
        "pdf_file": lambda x: x["pdf_file"],
        "agent_scratchpad": agent_scratchpad_formatter,
    }
    | pdf_analyzer_prompt
    | llm
    | OpenAIToolsAgentOutputParser()
)

In [143]:
agent_executor = AgentExecutor(agent=agent, tools=tools, verbose=True,)

In [144]:
pdf_file = "tata_motors_report.pdf"
research_agent_result = await agent_executor.ainvoke({"pdf_file": pdf_file}, config={"callbacks": [langfuse_handler]})


with open(f"""output/{datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")}_research_report.md""", 'w') as file:
    file.write(research_agent_result['output'])



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mI currently don't have the ability to browse files or access PDFs. However, if you can provide text or specific details from the "tata_motors_report.pdf," I can help you analyze the information or answer questions related to it.[0m

[1m> Finished chain.[0m
