### Install Dependencies

In [1]:
# !pip install langchain
# !pip install langchain-community
# !pip install playwright
# !pip install lxml
# !playwright install
# !pip install beautifulsoup4
# !pip install langchain_openai
# !pip install google-api-python-client
# !pip install langfuse

In [2]:
from langchain_community.agent_toolkits import PlayWrightBrowserToolkit
from langchain_community.tools.playwright.utils import (
    create_async_playwright_browser,  # A synchronous browser is available, though it isn't compatible with jupyter.\n",      },
)

from langchain_community.utilities import GoogleSearchAPIWrapper
from langchain_core.tools import Tool

from langchain.agents.format_scratchpad.openai_tools import (
    format_to_openai_tool_messages,
)
from langchain.agents.output_parsers.openai_tools import OpenAIToolsAgentOutputParser
from langchain.agents import AgentExecutor
from langchain_openai import ChatOpenAI
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain.schema import StrOutputParser
import datetime
import os

from bs4 import BeautifulSoup
import asyncio

In [3]:
# Telemetrics for langchain

from langfuse.callback import CallbackHandler
langfuse_handler = CallbackHandler(
    # localhost so does not matter
    secret_key="sk-lf-e95d0d73-1aaa-4ff1-85db-47325731b067",
    public_key="pk-lf-6b155085-85e8-4ca6-8bcb-58dcdc4a7ad8",
    host="http://localhost:3000",
)

In [4]:
# This import is required only for jupyter notebooks, since they have their own eventloop
import nest_asyncio
nest_asyncio.apply()

In [5]:
from langchain_community.tools.playwright.utils import (
    run_async
)

from playwright.async_api import async_playwright
browser = run_async(async_playwright().start())
firefox = await browser.firefox.launch(headless=False, slow_mo=5000)

In [6]:
# browser_toolkit = PlayWrightBrowserToolkit.from_browser(async_browser=firefox)
# browser_tools = browser_toolkit.get_tools()

In [7]:
# Webpage tool

async def visit_webpage_and_get_text(url: str):
    browser = run_async(async_playwright().start())
    firefox = await browser.firefox.launch(headless=False, slow_mo=5000)
    page = await firefox.new_page()
    await page.goto(url)
    content = await page.content()
    text_content = BeautifulSoup(content, 'html.parser').get_text()
    return text_content

view_webpage_tool = Tool.from_function(
    name='view_webpage_tool',
    func=lambda url: asyncio.run(visit_webpage_and_get_text(url)),
    description="Visit a webpage and get the contents of the page. The input is a URL and the output is the text content of the page.",
)


In [8]:
# Search tools

search = GoogleSearchAPIWrapper()

search_tool = Tool(
    name="google_search",
    description="Search Google for recent results. This will return relevant information from the web with links to the sources.",
    func=lambda query: f"""
        Result for query: {query}
        {os.linesep.join([f"Title: {result['title']}  Peek into article {result['snippet']}  Link for more info: {result['link']}" for result in search.results(query, num_results=5)])}
    """,
)

  warn_deprecated(


In [9]:
research_prompt = ChatPromptTemplate.from_messages([
    (
        "system",
        """
        You are a research agent. You will be given a topic and you need to do research on it. You have access to tools that you can use to help you get information while preparing the research report.
        You need to create a detailed research report that covers the topic in detail. First gather all the information you can find on the topic on the web.

        Only use the information from the articles and websites you find. Do not use any information from your own knowledge or experience.

        Return a research report in the markdown format.
        """
    ),
    ("human", "Research {research_topic}"),
    MessagesPlaceholder(variable_name="agent_scratchpad"),
])

In [10]:
llm = ChatOpenAI(
    model="gpt-4o",
    max_retries=2,
    temperature=0.25,
)
agent_tools = [search_tool] + [view_webpage_tool]
llm = llm.bind_tools(agent_tools)

In [11]:
def agent_scratchpad_formatter(x):
    return format_to_openai_tool_messages(x["intermediate_steps"])

agent = (
    {
        "research_topic": lambda x: x["research_topic"],
        "agent_scratchpad": agent_scratchpad_formatter,
    }
    | research_prompt
    | llm
    | OpenAIToolsAgentOutputParser() # This gets the output from the OpenAI api call and gets the tool invocations from it. This then is executed by the AgentExecutor.
)

In [12]:
agent_executor = AgentExecutor(agent=agent, tools=agent_tools, verbose=True)

In [13]:
research_topic = "Indian Elections 2024 Result"
research_agent_result = await agent_executor.ainvoke({"research_topic": research_topic}, config={"callbacks": [langfuse_handler]})


with open(f"""output/{datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")}_research_report.md""", 'w') as file:
    file.write(research_agent_result['output'])



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Invoking: `google_search` with `Indian Elections 2024 Result`


[0m[32;1m[1;3m
Invoking: `google_search` with `Indian Elections 2024 outcome analysis`


[0m[36;1m[1;3m
        Result for query: Indian Elections 2024 outcome analysis
        Title: Decoding India's 2024 Election Contest - Carnegie Endowment for ...  Peek into article Dec 7, 2023 ... India Elects 2024 presents sober, data-driven research and analysis from the world's best young India scholars on the key drivers animating this ...  Link for more info: https://carnegieendowment.org/2023/12/07/decoding-india-s-2024-election-contest-pub-91178
Title: India election results 2024 updates: Modi says he will form next ...  Peek into article Jun 4, 2024 ... Independent candidate Abdul Rasheed Sheikh, popularly known as engineer Rasheed, won the elections from Baramulla in northern Kashmir. He ...  Link for more info: https://www.aljazeera.com/news/liveblog/2024/6/

RateLimitError: Error code: 429 - {'error': {'message': 'Request too large for gpt-4o in organization org-KQrhbeSVwqVovaiWSmMB4kCR on tokens per min (TPM): Limit 30000, Requested 102818. The input or output tokens must be reduced in order to run successfully. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}}

received error response: {'message': 'Invalid request data', 'errors': ['Expected object, received string']}
Received 400 error by Langfuse server, not retrying: {'message': 'Invalid request data', 'errors': ['Expected object, received string']}


In [None]:
# for step in agent_executor.iter({"input": "India Election 2024 results"}):
#     if output := step.get("intermediate_step"):
#         action, value = output[0]
#         # Ask user if they want to continue
#         _continue = input("Should the agent continue (Y/n)?:\n") or "Y"
#         if _continue.lower() != "y":
#             break