In [1]:
!pip install langgraph tavily-python langchain-community langchain_anthropic

Collecting langchain-community
  Downloading langchain_community-0.2.7-py3-none-any.whl (2.2 MB)
[2K     [38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.2/2.2 MB[0m [31m3.3 MB/s[0m eta [36m0:00:00[0m MB/s[0m eta [36m0:00:01[0m:01[0m0m
[?25hCollecting langchain_anthropic
  Downloading langchain_anthropic-0.1.20-py3-none-any.whl (20 kB)
Collecting SQLAlchemy<3,>=1.4
  Downloading SQLAlchemy-2.0.31-cp311-cp311-macosx_11_0_arm64.whl (2.1 MB)
[2K     [38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.1/2.1 MB[0m [31m15.4 MB/s[0m eta [36m0:00:00[0mm eta [36m0:00:01[0m0:01[0m:01[0m
[?25hCollecting aiohttp<4.0.0,>=3.8.3
  Using cached aiohttp-3.9.5-cp311-cp311-macosx_11_0_arm64.whl (390 kB)
Collecting dataclasses-json<0.7,>=0.5.7
  Downloading dataclasses_json-0.6.7-py3-none-any.whl (28 kB)
Collecting langchain<0.3.0,>=0.2.7
  Downloading langchain-0.2.7-py3-none-any.whl (983 kB)
[2K     [38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━

In [1]:
import os
os.environ['LANGCHAIN_TRACING_V2'] = "true"

In [2]:
from langchain_community.document_loaders import WebBaseLoader

USER_AGENT environment variable not set, consider setting it to identify your requests.


In [81]:
from langchain_core.pydantic_v1 import BaseModel, Field
from typing import Literal
from langchain_anthropic import ChatAnthropic
import json
from langchain_community.tools.tavily_search import TavilySearchResults
from typing import TypedDict, Optional
from langgraph.graph import StateGraph, END

In [82]:
search_tool = TavilySearchResults(name="Search")

In [83]:
raw_model = ChatAnthropic(model_name="claude-3-5-sonnet-20240620")

In [92]:
class Info(BaseModel):
    headquarters: str = Field(description='City in which this company is headquartered, should be in the form "San Francisco, USA"')
    number_employees: Optional[int] = Field(description="number of full time employees")
    investors: str = Field(description="Notable investors, should be a comma separated list")
    capital_raised: str = Field(description="amount of money raised, should be like: `None`, `10k`, `35m`, `1b` etc")
    ceo: str = Field(description="Name of CEO")
    ceo_college: str = Field(description="Where did the CEO go to college")

In [93]:
main_prompt = """You are doing research on companies. You are trying to figure out this information:

<info>
{info}
</info>


You have access to the following tools:

- `Search`: call a search tool and get back some results
- `ScrapeWebsite`: scrape a website and get relevant notes about the company. This will update the notes above.
- `Info`: call this when you are done and have gathered all the relevant info

Here is the information you have about the company you are researching:

Company Name: {company_name}"""

In [94]:
p = main_prompt.format(info=Info.schema_json(), company_name="LangChain")

In [95]:
info_prompt = """You are doing research on companies. You are trying to figure out this information:

<info>
{info}
</info>

You just scraped the following website: {url}

Based on the website content below, jot down some notes about the website.

{content}"""
def ScapeWebsite(url: str):
    """Used to scrape a website"""
    loader = WebBaseLoader(url)
    docs = loader.load()
    website = docs[0].page_content
    p = info_prompt.format(info=Info.schema_json(), url=url, content=website)
    response = raw_model.invoke(p)
    return response

In [96]:
from langgraph.graph import StateGraph, MessagesState, END
from langgraph.prebuilt import ToolNode
from langchain_core.messages import ToolMessage

class GraphState(MessagesState):
    company_name: str
    info: Info

In [97]:
class Good(BaseModel):
    reason: str
    good: bool

In [98]:
def call_model(state):
    p = main_prompt.format(info=Info.schema_json(), company_name=state['company_name'])
    messages = [{"role": "human", "content": p}] + state['messages']
    return {"messages": model.invoke(messages)}


def call_checker(state):
    p = main_prompt.format(info=Info.schema_json(), company_name=state['company_name'])
    messages = [{"role": "human", "content": p}] + state['messages'][:-1] # get rid of the last one
    presumed_info = state['messages'][-1].tool_calls[0]['args']
    p1 = f"I am thinking of calling the info tool with the info below. Is this good? Give your reasoning as well. You can encourage the Assistant to look at specific URLs if that seems relevant, or do more searches.\n\n{presumed_info}"
    messages.append({"role": "human", "content": p1})
    response = raw_model.with_structured_output(Good).invoke(messages)
    if response.good:
        try:
            return {"info": Info(**state['messages'][-1].tool_calls[0]['args'])}
        except Exception as e:
            return {"messages": [ToolMessage(tool_call_id=state['messages'][-1].tool_calls[0]['id'], content=f"Invalid response: {e}")]}
    else:
        return {"messages": [ToolMessage(tool_call_id=state['messages'][-1].tool_calls[0]['id'], content=str(response), artifact=response)]}

tool_node = ToolNode([search_tool, ScapeWebsite])

def bad_agent(state):
    return {"messages": [{"content": "You must call one, and only one, tool!", "role": "user"}]}

def route_after_agent(state):
    last_message = state['messages'][-1]
    if len(last_message.tool_calls) != 1:
        return "bad_agent"
    elif last_message.tool_calls[0]['name'] == "Info":
        return "call_checker"
    else:
        return "tool_node"


def route_after_checker(state):
    if 'info' in state:
        return END
    return "call_model"

graph = StateGraph(GraphState)
graph.add_node(call_model)
graph.add_node(call_checker)
graph.add_node(bad_agent)
graph.add_node("tool_node", tool_node)
graph.set_entry_point("call_model")
graph.add_conditional_edges("call_model", route_after_agent)
graph.add_edge("tool_node", "call_model")
graph.add_conditional_edges("call_checker", route_after_checker)
graph.add_edge("bad_agent", "call_model")
graph = graph.compile()

In [99]:
graph.invoke({"company_name": "LangChain"})

{'messages': [AIMessage(content=[{'text': "Certainly! I'll research LangChain and gather the required information using the available tools. Let's start with a search query to get some initial information about the company.", 'type': 'text'}, {'id': 'toolu_016kiWe4nKZj3hLsMJ728KGD', 'input': {'query': 'LangChain company information headquarters investors funding CEO'}, 'name': 'Search', 'type': 'tool_use'}], response_metadata={'id': 'msg_01Vqj55jS2jjDzjfQnucqks6', 'model': 'claude-3-5-sonnet-20240620', 'stop_reason': 'tool_use', 'stop_sequence': None, 'usage': {'input_tokens': 974, 'output_tokens': 96}}, id='run-44dfc675-a09f-44dc-9cd2-c0c2874eda85-0', tool_calls=[{'name': 'Search', 'args': {'query': 'LangChain company information headquarters investors funding CEO'}, 'id': 'toolu_016kiWe4nKZj3hLsMJ728KGD', 'type': 'tool_call'}], usage_metadata={'input_tokens': 974, 'output_tokens': 96, 'total_tokens': 1070}),
  ToolMessage(content='[{"url": "https://pitchbook.com/profiles/company/5229