In [22]:
# Imports
import os
import getpass
import base64
from typing import TypedDict, Annotated, Sequence

import operator
from typing import Annotated, Any, Dict, List, Optional, Sequence, TypedDict
import functools

from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from langgraph.graph import StateGraph, END
from langgraph.prebuilt import ToolNode
from langgraph.graph import StateGraph, END
from langchain.agents import AgentExecutor, create_openai_tools_agent
from langchain_core.messages import BaseMessage, HumanMessage
from langchain_openai import ChatOpenAI
from langchain_community.tools.tavily_search import TavilySearchResults
from langchain_community.tools import GooglePlacesTool
from langchain_core.output_parsers.openai_functions import JsonOutputFunctionsParser

# Set API Keys (temp)
os.environ['LANGCHAIN_TRACING_V2'] = 'true'
os.environ['LANGCHAIN_API_KEY'] = getpass.getpass("Enter your Langchain API key: ")
os.environ['TAVILY_API_KEY'] = getpass.getpass("Enter your Tavily API key: ")
os.environ['OPENAI_API_KEY'] = getpass.getpass("Enter your OpenAI API key: ")
os.environ["GPLACES_API_KEY"] = getpass.getpass("Enter your Google Places API key: ")


## Define prompts

In [15]:
supervisor_sys_prompt = """You are a supervisor tasked with managing a conversation between the following workers:  {members}. Given the following user request,
respond with the worker to act next. Each worker will perform a task and respond with their results and status. When finished, respond with FINISH."""
supervisor_user_prompt = """Given the conversation above, who should act next? Or should we FINISH? Select one of: {options}"""

json_prompt = "Only return a valid json string (RCF8259). Do provide any other commentary. Do not wrap the JSON in markdown such as ```json. Only use the data from the provided content."

prompt_template = """USER: Given a set of streetview images from a vehicle, your task is to determine the
coordinates from which the picture was taken. It can be anywhere in the world.

Return json with the city and coordinates following the below example. {json_prompt}
output={{"city": "Orland Park, IL, 60467, USA", "latitude": "42.0099", "longitude": "-87.62317"}}

AGENT: output="""

text_prompt = prompt_template.format(json_prompt=json_prompt)

## Helper Utilities

In [16]:
def create_agent(llm: ChatOpenAI, tools: list, system_prompt: str):
    # Each worker node will be given a name and some tools.
    prompt = ChatPromptTemplate.from_messages(
        [
            ("system", system_prompt),
            MessagesPlaceholder(variable_name="messages"),
            MessagesPlaceholder(variable_name="agent_scratchpad"),
        ]
    )
    agent = create_openai_tools_agent(llm, tools, prompt)
    executor = AgentExecutor(agent=agent, tools=tools)
    return executor

def agent_node(state, agent, name):
    result = agent.invoke(state)
    return {"messages": [HumanMessage(content=result["output"], name=name)]}

## Define Tools

In [17]:
# Search tool
search_tool = TavilySearchResults(max_results=3)
places_tool = GooglePlacesTool()
tools = [search_tool, places_tool]

## Create Supervisor

In [18]:
members = ["Researcher", "Map_Searcher"] # "Image_Describer", "Geo_Guesser"

# Our team supervisor is an LLM node. It just picks the next agent to process
# and decides when the work is completed
options = ["FINISH"] + members
# Using openai function calling can make output parsing easier for us
function_def = {
    "name": "route",
    "description": "Select the next role.",
    "parameters": {
        "title": "routeSchema",
        "type": "object",
        "properties": {
            "next": {
                "title": "Next",
                "anyOf": [
                    {"enum": options},
                ],
            }
        },
        "required": ["next"],
    },
}
prompt = ChatPromptTemplate.from_messages(
    [
        ("system", supervisor_sys_prompt),
        MessagesPlaceholder(variable_name="messages"),
        supervisor_user_prompt,
    ]
).partial(options=str(options), members=", ".join(members))

llm = ChatOpenAI(model="gpt-4o")

supervisor_chain = (
    prompt
    | llm.bind_functions(functions=[function_def], function_call="route")
    | JsonOutputFunctionsParser()
)

## Create Agents

In [19]:
#members = ["Image_Describer", "Researcher", "Map_Searcher", "Geo_Guesser"]

# # Image Describer
# image_describer_agent = create_agent(llm, [], "You analyze streetview images to extract key information to help identify the location.")
# image_describer_node = functools.partial(agent_node, agent=image_describer_agent, name="Image_Describer")

# Researcher
research_agent = create_agent(llm, [search_tool], "You are a web researcher.")
research_node = functools.partial(agent_node, agent=research_agent, name="Researcher")

# Map_Searcher
map_searcher_agent = create_agent(llm, [places_tool], "You are a map searcher.")
map_searcher_node = functools.partial(agent_node, agent=map_searcher_agent, name="Map_Searcher")

# # Geo_Guesser
# geo_guesser_agent = create_agent(llm, [], "You are a geo guesser.")
# geo_guesser_node = functools.partial(agent_node, agent=geo_guesser_agent, name="Geo_Guesser")

## Create Graph

In [20]:
# The agent state is the input to each node in the graph
class AgentState(TypedDict):
    # The annotation tells the graph that new messages will always
    # be added to the current states
    messages: Annotated[Sequence[BaseMessage], operator.add]
    # The 'next' field indicates where to route to next
    next: str

# Create graph
workflow = StateGraph(AgentState)
#workflow.add_node("Image_Describer", image_describer_node)
workflow.add_node("Researcher", research_node)
workflow.add_node("Map_Searcher", map_searcher_node)
#workflow.add_node("Geo_Guesser", geo_guesser_node)
workflow.add_node("supervisor", supervisor_chain)

# Make each worker report back to supervisor when done
for member in members:
    workflow.add_edge(member, "supervisor")

# The supervisor populates the "next" field in the graph state
conditional_map = {k: k for k in members}
conditional_map["FINISH"] = END
workflow.add_conditional_edges("supervisor", lambda x: x["next"], conditional_map)

# Add entrypoint and compile
workflow.set_entry_point("supervisor")
graph = workflow.compile()

## Run

In [21]:
# Function to encode the image
def encode_image(image_path):
  with open(image_path, "rb") as image_file:
    return base64.b64encode(image_file.read()).decode('utf-8')

# Path to your image
id = "103"
image_dir = f'./data/{id}/'
directions = ["north", "south", "east", "west"]
image_inputs = []
for direction in directions:
    base64_img = encode_image(f"{image_dir}{direction}.png")
    image_input = {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{base64_img}"}}
    image_inputs.append(image_input)

# Define input
text_input = [{"type": "text", "text": text_prompt}]
inputs = {"messages": [HumanMessage(content=text_input + image_inputs)]}

# Run graph
for s in graph.stream(inputs):
    if "__end__" not in s:
        print(s)
        print("----")

{'supervisor': {'next': 'Researcher'}}
----
{'Researcher': {'messages': [HumanMessage(content='{"city": "Bourges, France", "latitude": "47.0842", "longitude": "2.3966"}', name='Researcher')]}}
----
{'supervisor': {'next': 'FINISH'}}
----
