In [45]:
# !pip install --quiet -U langgraph
!pip install --upgrade --quiet  wikipedia


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m24.1.1[0m[39;49m -> [0m[32;49m24.1.2[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


In [34]:
import os

In [35]:
from langchain.agents import create_openai_functions_agent
from langchain_openai.chat_models import ChatOpenAI
from langchain_community.tools.tavily_search import TavilySearchResults
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain_community.utilities import GoogleSearchAPIWrapper
from langchain_core.tools import Tool

from langchain import hub
from langchain.agents import create_openai_functions_agent

from bs4 import BeautifulSoup
import asyncio

In [36]:
# Telemetrics for langchain

from langfuse.callback import CallbackHandler
langfuse_handler = CallbackHandler(
    # localhost so does not matter
    secret_key="sk-lf-84b00334-152c-4290-bbad-de905088d121",
    public_key="pk-lf-dc2ee1e0-04f6-485f-8a29-58912f561c1c",
    host="http://localhost:3000",
)

In [37]:
# This import is required only for jupyter notebooks, since they have their own eventloop
import nest_asyncio
nest_asyncio.apply()

In [38]:
from langchain_community.tools.playwright.utils import (
    run_async
)

from playwright.async_api import async_playwright
browser = run_async(async_playwright().start())
firefox = await browser.firefox.launch(headless=False, slow_mo=5000)

In [39]:
# Webpage tool

async def visit_webpage_and_get_text(url: str):
    browser = run_async(async_playwright().start())
    firefox = await browser.firefox.launch(headless=False, slow_mo=5000)
    page = await firefox.new_page()
    await page.goto(url)
    content = await page.content()
    text_content = BeautifulSoup(content, 'html.parser').get_text()
    return text_content

view_webpage_tool = Tool.from_function(
    name='view_webpage_tool',
    func=lambda url: asyncio.run(visit_webpage_and_get_text(url)),
    description="Visit a webpage and get the contents of the page. The input is a URL and the output is the text content of the page.",
)


In [40]:
search = GoogleSearchAPIWrapper()

search_tool = Tool(
    name="google_search",
    description="Search Google for recent results. This will return relevant information from the web with links to the sources.",
    func=lambda query: f"""
        Result for query: {query}
        {os.linesep.join([f"Title: {result['title']}  Peek into article {result['snippet']}  Link for more info: {result['link']}" for result in search.results(query, num_results=5)])}
    """,
)

tools=[search_tool, view_webpage_tool]

prompt = ChatPromptTemplate.from_messages([
    (
        "system",
        """
        You are a research agent. You will be given a topic and you need to do research on it. You have access to tools that you can use to help you get information while preparing the research report.
        You need to create a detailed research report that covers the topic in detail. First gather all the information you can find on the topic on the web.
        """
    ),
    MessagesPlaceholder(variable_name="chat_history"),
    ("human", "Research {research_topic}"),
    MessagesPlaceholder(variable_name="agent_scratchpad"),
])

llm = ChatOpenAI(
    model="gpt-4o",
    max_retries=2,
    temperature=0.25,
)

agent_runnable = create_openai_functions_agent(llm, tools, prompt)

In [41]:
import operator
from typing import Annotated, TypedDict, Union

from langchain_core.agents import AgentAction, AgentFinish
from langchain_core.messages import BaseMessage


class AgentState(TypedDict):
    # The input string
    research_topic: str
    # The list of previous messages in the conversation
    chat_history: list[BaseMessage]
    # The outcome of a given call to the agent
    # Needs `None` as a valid type, since this is what this will start as
    agent_outcome: Union[AgentAction, AgentFinish, None]
    # List of actions and corresponding observations
    # Here we annotate this with `operator.add` to indicate that operations to
    # this state should be ADDED to the existing values (not overwrite it)
    intermediate_steps: Annotated[list[tuple[AgentAction, str]], operator.add]

In [42]:
from langchain_core.agents import AgentFinish

from langgraph.prebuilt.tool_executor import ToolExecutor

# This a helper class we have that is useful for running tools
# It takes in an agent action and calls that tool and returns the result
tool_executor = ToolExecutor(tools)


# Define the agent
def run_agent(data):
    print("faiz here", data)
    agent_outcome = agent_runnable.invoke(data)
    return {"agent_outcome": agent_outcome}


# Define the function to execute tools
def execute_tools(data):
    print("faiz execute_tools", data)
    # Get the most recent agent_outcome - this is the key added in the `agent` above
    agent_action = data["agent_outcome"]
    output = tool_executor.invoke(agent_action)
    return {"intermediate_steps": [(agent_action, str(output))]}


# Define logic that will be used to determine which conditional edge to go down
def should_continue(data):
    # If the agent outcome is an AgentFinish, then we return `exit` string
    # This will be used when setting up the graph to define the flow
    if isinstance(data["agent_outcome"], AgentFinish):
        return "end"
    # Otherwise, an AgentAction is returned
    # Here we return `continue` string
    # This will be used when setting up the graph to define the flow
    else:
        return "continue"

In [43]:
from langgraph.graph import END, StateGraph, START

# Define a new graph
workflow = StateGraph(AgentState)

# Define the two nodes we will cycle between
workflow.add_node("agent", run_agent)
workflow.add_node("action", execute_tools)

# Set the entrypoint as `agent`
# This means that this node is the first one called
workflow.add_edge(START, "agent")

# We now add a conditional edge
workflow.add_conditional_edges(
    # First, we define the start node. We use `agent`.
    # This means these are the edges taken after the `agent` node is called.
    "agent",
    # Next, we pass in the function that will determine which node is called next.
    should_continue,
    # Finally we pass in a mapping.
    # The keys are strings, and the values are other nodes.
    # END is a special node marking that the graph should finish.
    # What will happen is we will call `should_continue`, and then the output of that
    # will be matched against the keys in this mapping.
    # Based on which one it matches, that node will then be called.
    {
        # If `tools`, then we call the tool node.
        "continue": "action",
        # Otherwise we finish.
        "end": END,
    },
)

# We now add a normal edge from `tools` to `agent`.
# This means that after `tools` is called, `agent` node is called next.
workflow.add_edge("action", "agent")

# Finally, we compile it!
# This compiles it into a LangChain Runnable,
# meaning you can use it as you would any other runnable
app = workflow.compile()

In [44]:
input = {"research_topic": "India 2024 lok sabha elections", "chat_history": [], "intermediate_steps": [], "agent_outcome": None}
app.invoke(input=input, config={"callbacks": [langfuse_handler]})

faiz here {'research_topic': 'India 2024 lok sabha elections', 'chat_history': [], 'agent_outcome': None, 'intermediate_steps': []}
faiz execute_tools {'research_topic': 'India 2024 lok sabha elections', 'chat_history': [], 'agent_outcome': AgentActionMessageLog(tool='google_search', tool_input='India 2024 Lok Sabha elections', log='\nInvoking: `google_search` with `India 2024 Lok Sabha elections`\n\n\n', message_log=[AIMessage(content='', additional_kwargs={'function_call': {'arguments': '{"__arg1":"India 2024 Lok Sabha elections"}', 'name': 'google_search'}}, response_metadata={'token_usage': {'completion_tokens': 22, 'prompt_tokens': 198, 'total_tokens': 220}, 'model_name': 'gpt-4o-2024-05-13', 'system_fingerprint': 'fp_dd932ca5d1', 'finish_reason': 'function_call', 'logprobs': None}, id='run-755f532a-ba46-43a3-a140-a92d72fa4366-0', usage_metadata={'input_tokens': 198, 'output_tokens': 22, 'total_tokens': 220})]), 'intermediate_steps': []}
faiz here {'research_topic': 'India 2024 lo

RateLimitError: Error code: 429 - {'error': {'message': 'Request too large for gpt-4o in organization org-KQrhbeSVwqVovaiWSmMB4kCR on tokens per min (TPM): Limit 30000, Requested 37106. The input or output tokens must be reduced in order to run successfully. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}}

received error response: {'message': 'Invalid request data', 'errors': ['Expected object, received string']}
Received 400 error by Langfuse server, not retrying: {'message': 'Invalid request data', 'errors': ['Expected object, received string']}


In [14]:
app.get_graph().draw_mermaid_png(output_file_path="graph.png")

b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x00\x00\x01\x00\x01\x00\x00\xff\xe2\x01\xd8ICC_PROFILE\x00\x01\x01\x00\x00\x01\xc8\x00\x00\x00\x00\x040\x00\x00mntrRGB XYZ \x07\xe0\x00\x01\x00\x01\x00\x00\x00\x00\x00\x00acsp\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\xf6\xd6\x00\x01\x00\x00\x00\x00\xd3-\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\tdesc\x00\x00\x00\xf0\x00\x00\x00$rXYZ\x00\x00\x01\x14\x00\x00\x00\x14gXYZ\x00\x00\x01(\x00\x00\x00\x14bXYZ\x00\x00\x01<\x00\x00\x00\x14wtpt\x00\x00\x01P\x00\x00\x00\x14rTRC\x00\x00\x01d\x00\x00\x00(gTRC\x00\x00\x01d\x00\x00\x00(bTRC\x00\x00\x01d\x00\x00\x00(cprt\x00\x00\x01\x8c\x00\x00\x00<mluc\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x0cenUS\x00\x00\x00\x08\x00\x00\x00\x1c\x00s\x00R\x00G\x00BXYZ \x00\x00\x00\x00