# Imports

In [1]:
# ruff: noqa
from pydantic import BaseModel, Field
from langsmith import Client
import os

from langsmith import traceable

from langgraph.graph import StateGraph, START, END
from langgraph.prebuilt import ToolNode

from langchain_core.messages import convert_to_openai_messages

from jinja2 import Template
from typing import Dict, Any, Annotated, List
from operator import add
from openai import OpenAI


import instructor

from utils.utils import get_tool_descriptions, format_ai_message
from utils.tools import (
    get_formatted_context,
    get_formatted_reviews_context,
    get_shopping_cart,
    remove_from_cart,
    add_to_shopping_cart,
)

from langgraph.checkpoint.postgres import PostgresSaver

  from .autonotebook import tqdm as notebook_tqdm


# Create Coordinator Dataset

In [2]:
class Delegation(BaseModel):
    agent: str
    task: str


class CoordinatorAgentResponse(BaseModel):
    next_agent: str
    plan: List[Delegation]
    answer: str
    final_answer: bool

In [3]:
from langchain_core.messages import AIMessage


@traceable(
    name="coordinator_agent",
    run_type="llm",
    metadata={"ls_provider": "openai", "ls_model_name": "gpt-4.1"},
)
def coordinator_agent(state: "State"):
    prompt_template = """You are a Coordinator Agent as part of a shopping assistant.

Your role is to create plans for solving user queries and delegate the tasks accordingly.
You will be given a conversation history, your task is to create a plan for solving the user's query.
After the plan is created, you should output the next agent to invoke and the task to be performed by that agent.
Once an agent finishes its task, you will be handed the control back, you should then review the conversation history and revise the plan.
If there is a sequence of tasks to be performed by a single agent, you should combine them into a single task.

The possible agents are:

- product_qa_agent: The user is asking a question about a product. This can be a question about available products, their specifications, user reviews etc.
- shopping_cart_agent: The user is asking to add or remove items from the shopping cart or questions about the current shopping cart.

CRITICAL RULES:
- If next_agent is "", final_answer MUST be false
(You cannot delegate the task to an agent and return to the user in the same response)
- If final_answer is true, next_agent MUST be ""
(You must wait for agent results before returning to user)
- If you need to call other agents before answering, set:
next_agent="...", final_answer=false
- After receiving agent results, you can then set:
next_agent="", final_answer=true
- One of the following has to be true:
next_agent is "" and final_answer is true
next_agent is not "" and final_answer is false

Additional instructions:

- Do not route to any agent if the user's query needs clarification. Do it yourself.
- Write the plan to the plan field.
- Write the next agent to invoke to the next_agent field.
- Once you have all the information needed to answer the user's query, you should set the final_answer field to True and output the answer to the user's query.
- The final answer to the user query should be a comprehensive answer that explains the actions that were performed to answer the query.
- Never set final_answer to true if the plan is not complete.
- You should output the next_agent field as well as the plan field.
"""

    template = Template(prompt_template)

    prompt = template.render()

    messages = state.messages

    conversation = []

    for message in messages:
        conversation.append(convert_to_openai_messages(message))

    client = instructor.from_openai(OpenAI())

    response, raw_response = client.chat.completions.create_with_completion(
        model="gpt-4.1",
        response_model=CoordinatorAgentResponse,
        messages=[{"role": "system", "content": prompt}, *conversation],
        temperature=0.0,
    )

    if response.final_answer:
        ai_message = []
    else:
        ai_message = [AIMessage(content=response.answer)]

    return {
        "messages": ai_message,
        "answer": response.answer,
        "coordinator_agent": {
            "iteration": state.coordinator_agent.iteration + 1,
            "final_answer": response.final_answer,
            "next_agent": response.next_agent,
            "plan": [data.model_dump() for data in response.plan],
        },
    }

In [4]:
class ToolCall(BaseModel):
    name: str
    arguments: dict


class RAGUsedContext(BaseModel):
    id: str = Field(description="ID of the item used to answer the question")
    description: str = Field(description="Description of the item used to answer")


class AgentProperties(BaseModel):
    iteration: int = 0
    final_answer: bool = False
    available_tools: List[Dict[str, Any]] = []
    tool_calls: List[ToolCall] = []


class CoordinatorAgentProperties(BaseModel):
    iteration: int = 0
    final_answer: bool = False
    plan: List[Delegation] = []
    next_agent: str = ""


class State(BaseModel):
    messages: Annotated[List[Any], add] = []
    user_intent: str = ""
    product_qa_agent: AgentProperties = Field(default_factory=AgentProperties)
    shopping_cart_agent: AgentProperties = Field(default_factory=AgentProperties)
    coordinator_agent: CoordinatorAgentProperties = Field(
        default_factory=CoordinatorAgentProperties
    )
    answer: str = ""
    references: Annotated[List[RAGUsedContext], add] = []
    user_id: str = ""
    cart_id: str = ""

In [5]:
initial_state = State(
    messages=[{"role": "user", "content": "How is the weather today?"}]
)

In [6]:
coordinator_agent(initial_state)

{'messages': [],
 'answer': "I'm here to assist with shopping-related queries, such as finding products, answering questions about items, or helping with your shopping cart. If you need information about the weather, I recommend checking a weather app or website. How can I assist you with your shopping today?",
 'coordinator_agent': {'iteration': 1,
  'final_answer': True,
  'next_agent': '',
  'plan': []}}

In [7]:
coordinator_eval_dataset = [
    {
        "inputs": {
            "messages": [{"role": "user", "content": "What is the weather today?"}]
        },
        "outputs": {"next_agent": "", "coordinator_final_answer": True},
    },
    {
        "inputs": {
            "messages": [{"role": "user", "content": "Can I get some earphones?"}]
        },
        "outputs": {
            "next_agent": "product_qa_agent",
            "coordinator_final_answer": False,
        },
    },
    {
        "inputs": {
            "messages": [
                {
                    "role": "user",
                    "content": "Can you add an item with ID B09NLTDHQ6 to my cart?",
                }
            ]
        },
        "outputs": {
            "next_agent": "shopping_cart_agent",
            "coordinator_final_answer": False,
        },
    },
    {
        "inputs": {
            "messages": [
                {"role": "user", "content": "Can you add those earphones to my cart?"}
            ]
        },
        "outputs": {"next_agent": "", "coordinator_final_answer": True},
    },
    {
        "inputs": {
            "messages": [
                {
                    "role": "user",
                    "content": "Can you add the best items to my cart? I am looking for laptop bags.",
                }
            ]
        },
        "outputs": {
            "next_agent": "product_qa_agent",
            "coordinator_final_answer": False,
        },
    },
    {
        "inputs": {
            "messages": [
                {
                    "role": "user",
                    "content": "Can you find some good reviews for items in my cart?",
                }
            ]
        },
        "outputs": {
            "next_agent": "shopping_cart_agent",
            "coordinator_final_answer": False,
        },
    },
    {
        "inputs": {
            "messages": [
                {
                    "role": "user",
                    "content": "Can you put the items with the most positive user reviews to my cart?",
                }
            ]
        },
        "outputs": {
            "next_agent": "product_qa_agent",
            "coordinator_final_answer": False,
        },
    },
    {
        "inputs": {
            "messages": [{"role": "user", "content": "What kind of stuff do you sell?"}]
        },
        "outputs": {"next_agent": "", "coordinator_final_answer": True},
    },
    {
        "inputs": {
            "messages": [{"role": "user", "content": "Can you help me with my order?"}]
        },
        "outputs": {"next_agent": "", "coordinator_final_answer": True},
    },
    {
        "inputs": {
            "messages": [
                {
                    "role": "user",
                    "content": "Can you add two, ideally red tablets to my cart?",
                }
            ]
        },
        "outputs": {
            "next_agent": "product_qa_agent",
            "coordinator_final_answer": False,
        },
    },
]

# Upload dataset to Langsmith

In [13]:
client = Client(api_key=os.environ["LANGSMITH_API_KEY"])
dataset_name = "coordinator-eval-dataset"

# Run Evaluator

In [9]:
def next_agent_evaluator(run, example):
    print(run.outputs)
    next_agent_match = (
        run.outputs["coordinator_agent"]["next_agent"] == example.outputs["next_agent"]
    )
    final_answer_match = (
        run.outputs["coordinator_agent"]["final_answer"]
        == example.outputs["coordinator_final_answer"]
    )
    return next_agent_match and final_answer_match

In [10]:
results = client.evaluate(
    lambda x: coordinator_agent(State(messages=x["messages"])),
    data=dataset_name,
    evaluators=[next_agent_evaluator],
    experiment_prefix="coordinator-eval-dataset",
)

View the evaluation results for experiment: 'coordinator-eval-dataset-610d88f5' at:
https://smith.langchain.com/o/8e3d1487-3f47-4c1c-9cb0-18779336b64c/datasets/2bcf0b0c-d379-4b82-8a45-483b4b5ac03a/compare?selectedSessions=58038809-bc1d-4a33-8f1e-2730e78a25c4




1it [00:02,  2.08s/it]

{'messages': [AIMessage(content='I will first look for two red tablets that are available for purchase. Once I find suitable options, I will proceed to add them to your cart.', additional_kwargs={}, response_metadata={})], 'answer': 'I will first look for two red tablets that are available for purchase. Once I find suitable options, I will proceed to add them to your cart.', 'coordinator_agent': {'iteration': 1, 'final_answer': False, 'next_agent': 'product_qa_agent', 'plan': [{'agent': 'product_qa_agent', 'task': 'Find two red tablets available for purchase.'}]}}


2it [00:04,  2.19s/it]

{'messages': [], 'answer': 'Could you please clarify what specific help you need with your order? For example, are you looking to track your order, modify it, cancel it, or have questions about the items in your order?', 'coordinator_agent': {'iteration': 1, 'final_answer': True, 'next_agent': '', 'plan': []}}


3it [00:07,  2.60s/it]

{'messages': [AIMessage(content='I will retrieve information about the types of products available for sale.', additional_kwargs={}, response_metadata={})], 'answer': 'I will retrieve information about the types of products available for sale.', 'coordinator_agent': {'iteration': 1, 'final_answer': False, 'next_agent': 'product_qa_agent', 'plan': [{'agent': 'product_qa_agent', 'task': 'Provide an overview of the types of products available for sale.'}]}}


4it [00:09,  2.22s/it]

{'messages': [AIMessage(content='I will first identify which items have the most positive user reviews. Once I have this information, I will proceed to add those items to your cart.', additional_kwargs={}, response_metadata={})], 'answer': 'I will first identify which items have the most positive user reviews. Once I have this information, I will proceed to add those items to your cart.', 'coordinator_agent': {'iteration': 1, 'final_answer': False, 'next_agent': 'product_qa_agent', 'plan': [{'agent': 'product_qa_agent', 'task': 'Identify the items with the most positive user reviews.'}]}}


5it [00:10,  1.99s/it]

{'messages': [AIMessage(content='First, I will retrieve the list of items in your shopping cart. Then, I will find good reviews for those items.', additional_kwargs={}, response_metadata={})], 'answer': 'First, I will retrieve the list of items in your shopping cart. Then, I will find good reviews for those items.', 'coordinator_agent': {'iteration': 1, 'final_answer': False, 'next_agent': 'shopping_cart_agent', 'plan': [{'agent': 'shopping_cart_agent', 'task': "Retrieve the list of items currently in the user's shopping cart."}]}}


6it [00:13,  2.20s/it]

{'messages': [AIMessage(content='I will first find the best laptop bags available based on user reviews, ratings, and popularity. Once I have this information, I will proceed to add the top options to your cart.', additional_kwargs={}, response_metadata={})], 'answer': 'I will first find the best laptop bags available based on user reviews, ratings, and popularity. Once I have this information, I will proceed to add the top options to your cart.', 'coordinator_agent': {'iteration': 1, 'final_answer': False, 'next_agent': 'product_qa_agent', 'plan': [{'agent': 'product_qa_agent', 'task': 'Find the best laptop bags available based on user reviews, ratings, and popularity.'}]}}


7it [00:15,  2.34s/it]

{'messages': [AIMessage(content='I need to identify which earphones the user is referring to before I can add them to your cart. I will check the previous context or ask for clarification if needed.', additional_kwargs={}, response_metadata={})], 'answer': 'I need to identify which earphones the user is referring to before I can add them to your cart. I will check the previous context or ask for clarification if needed.', 'coordinator_agent': {'iteration': 1, 'final_answer': False, 'next_agent': 'product_qa_agent', 'plan': [{'agent': 'product_qa_agent', 'task': 'Identify which earphones the user is referring to, based on previous context or by asking for clarification if unclear.'}]}}


8it [00:18,  2.30s/it]

{'messages': [AIMessage(content='I will add the item with ID B09NLTDHQ6 to your shopping cart.', additional_kwargs={}, response_metadata={})], 'answer': 'I will add the item with ID B09NLTDHQ6 to your shopping cart.', 'coordinator_agent': {'iteration': 1, 'final_answer': False, 'next_agent': 'shopping_cart_agent', 'plan': [{'agent': 'shopping_cart_agent', 'task': "Add the item with ID B09NLTDHQ6 to the user's shopping cart."}]}}


9it [00:20,  2.34s/it]

{'messages': [AIMessage(content='I will look up available earphones and provide you with some options to choose from.', additional_kwargs={}, response_metadata={})], 'answer': 'I will look up available earphones and provide you with some options to choose from.', 'coordinator_agent': {'iteration': 1, 'final_answer': False, 'next_agent': 'product_qa_agent', 'plan': [{'agent': 'product_qa_agent', 'task': 'Find available earphones for the user, including popular options and specifications.'}]}}


10it [00:22,  2.38s/it]

{'messages': [], 'answer': "I'm sorry, but I can only assist with shopping-related queries such as finding products, answering questions about products, or helping with your shopping cart. If you have a shopping question, please let me know!", 'coordinator_agent': {'iteration': 1, 'final_answer': True, 'next_agent': '', 'plan': []}}


10it [00:24,  2.46s/it]


In [11]:
results

Unnamed: 0,inputs.messages,outputs.messages,outputs.answer,outputs.coordinator_agent,error,reference.next_agent,reference.coordinator_final_answer,feedback.next_agent_evaluator,execution_time,example_id,id
0,"[{'role': 'user', 'content': 'Can you add two,...",[content='I will first look for two red tablet...,I will first look for two red tablets that are...,"{'iteration': 1, 'final_answer': False, 'next_...",,product_qa_agent,False,True,2.070718,16245aef-e2dd-41bc-8428-b8fea1f791f2,5359b8e5-1469-4b2d-a3bc-200c0f4a8d2e
1,"[{'role': 'user', 'content': 'Can you help me ...",[],Could you please clarify what specific help yo...,"{'iteration': 1, 'final_answer': True, 'next_a...",,,True,True,2.272181,1f3e82fc-ac37-497d-a815-1cb377f723ee,67969998-1a6f-4f02-a205-fe41cd1607ef
2,"[{'role': 'user', 'content': 'What kind of stu...",[content='I will retrieve information about th...,I will retrieve information about the types of...,"{'iteration': 1, 'final_answer': False, 'next_...",,,True,False,3.073772,7fde8f47-24e8-44ac-a430-ed13f5cf1975,456dd1a4-6289-4c58-aa69-7b2496dbfb03
3,"[{'role': 'user', 'content': 'Can you put the ...",[content='I will first identify which items ha...,I will first identify which items have the mos...,"{'iteration': 1, 'final_answer': False, 'next_...",,product_qa_agent,False,True,1.636405,aed4c20b-3076-4216-815e-c38af524472d,7f1e610b-25e0-47ff-9edc-db0a1535dfc4
4,"[{'role': 'user', 'content': 'Can you find som...","[content='First, I will retrieve the list of i...","First, I will retrieve the list of items in yo...","{'iteration': 1, 'final_answer': False, 'next_...",,shopping_cart_agent,False,True,1.571121,0b4a9492-712f-4e47-9735-a1780426fa22,f6d64b69-a03a-4483-acd7-c60cc60673df
5,"[{'role': 'user', 'content': 'Can you add the ...",[content='I will first find the best laptop ba...,I will first find the best laptop bags availab...,"{'iteration': 1, 'final_answer': False, 'next_...",,product_qa_agent,False,True,2.617034,b51537d1-b5d6-4610-8566-3a1163a2c369,a78b28b1-cc4a-4234-9e77-82b1af51e584
6,"[{'role': 'user', 'content': 'Can you add thos...",[content='I need to identify which earphones t...,I need to identify which earphones the user is...,"{'iteration': 1, 'final_answer': False, 'next_...",,,True,False,2.609219,7138d60c-9445-4972-aaf1-c3ac97182bd4,f63fdb7d-cf6a-42f1-8b07-4bb044d7f5ee
7,"[{'role': 'user', 'content': 'Can you add an i...",[content='I will add the item with ID B09NLTDH...,I will add the item with ID B09NLTDHQ6 to your...,"{'iteration': 1, 'final_answer': False, 'next_...",,shopping_cart_agent,False,True,2.216266,ac0d1c9e-b79a-4668-9777-7bda6077ca96,0b3fe2f6-6be1-4985-8937-b6cfb3844656
8,"[{'role': 'user', 'content': 'Can I get some e...",[content='I will look up available earphones a...,I will look up available earphones and provide...,"{'iteration': 1, 'final_answer': False, 'next_...",,product_qa_agent,False,True,2.434769,7d7aab00-9b11-4915-ab3f-fa8f1982fcae,29016864-00ff-4428-a104-46272c22fdcc
9,"[{'role': 'user', 'content': 'What is the weat...",[],"I'm sorry, but I can only assist with shopping...","{'iteration': 1, 'final_answer': True, 'next_a...",,,True,True,2.459628,6f4a483e-88ff-4001-9d2b-537a8c0fbbf1,69cd8179-0ed1-45f6-bde5-d9ef69cb0a6c


# Extract Evaluation Results

In [14]:
results.experiment_name

'coordinator-eval-dataset-610d88f5'

In [15]:
result_resp = client.read_project(
    project_name=results.experiment_name,
    include_stats=True,
)

In [16]:
result_resp

TracerSessionResult(id=UUID('58038809-bc1d-4a33-8f1e-2730e78a25c4'), start_time=datetime.datetime(2025, 11, 24, 21, 6, 21, 240704, tzinfo=datetime.timezone.utc), end_time=None, description=None, name='coordinator-eval-dataset-610d88f5', extra={'metadata': {'git': {'tags': None, 'dirty': True, 'branch': 'main', 'commit': '98d3aa72431288df0595709444aa4406c31b8ae8', 'repo_name': '00-ai-engineering-bootcamp-cohort-2', 'remote_url': 'git@github.com:eliekawerk/00-ai-engineering-bootcamp-cohort-2.git', 'author_name': 'eliekawerk', 'commit_time': '1763923228', 'author_email': 'eliekawerk@hotmail.com'}, 'revision_id': '98d3aa7-dirty', 'dataset_splits': ['base'], 'dataset_version': '2025-11-16T13:15:45.900365+00:00', 'num_repetitions': 1}}, tenant_id=UUID('8e3d1487-3f47-4c1c-9cb0-18779336b64c'), reference_dataset_id=UUID('2bcf0b0c-d379-4b82-8a45-483b4b5ac03a'), run_count=10, latency_p50=datetime.timedelta(seconds=2, microseconds=353000), latency_p99=datetime.timedelta(seconds=3, microseconds=328

In [17]:
result_resp.feedback_stats

{'next_agent_evaluator': {'n': 10,
  'avg': 0.8,
  'stdev': 0.39999999999999997,
  'errors': 0,
  'values': {},
  'type': 'primary',
  'contains_thread_feedback': False}}

In [18]:
result_resp.feedback_stats.get("next_agent_evaluator")

{'n': 10,
 'avg': 0.8,
 'stdev': 0.39999999999999997,
 'errors': 0,
 'values': {},
 'type': 'primary',
 'contains_thread_feedback': False}

In [19]:
result_resp.feedback_stats.get("next_agent_evaluator").get("avg")

0.8

In [None]:
result_resp.feedback_stats.get("next_agent_evaluator").get("errors")