# My-Agent Quality Analysis

In [3]:
import warnings

warnings.simplefilter(action="ignore", category=FutureWarning)

In [17]:
import couchbase.auth
import couchbase.cluster
import couchbase.options
import datasets
import dotenv
import os

dotenv.load_dotenv()

conn_opts = couchbase.options.ClusterOptions(
    authenticator=couchbase.auth.PasswordAuthenticator(
        os.getenv("AGENT_CATALOG_USERNAME"), os.getenv("AGENT_CATALOG_PASSWORD")
    )
)
cluster = couchbase.cluster.Cluster.connect(os.getenv("AGENT_CATALOG_CONN_STRING"), conn_opts)

# We will treat the results of our tools as RAGAS context (we need to transpose our generations).
query = cluster.analytics_query("""
    WITH LatestGenerations AS (
        FROM
            `travel-sample`.agent_activity.raw_logs AS s
        WHERE 
            s.`grouping` IS NOT NULL
        GROUP BY 
            s.`grouping`,
            s.session
            GROUP AS g
        LETTING
            msgs = (
                FROM 
                    g AS gi
                SELECT
                    gi.s.content.content,
                    gi.s.content.tool_calls,
                    gi.s.kind,
                    gi.s.timestamp,
                    ROW_NUMBER() OVER () AS row_num
            ),
            first_human = (
                FROM 
                    msgs AS msgsi
                WHERE 
                    msgsi.kind = "human"
                SELECT
                    msgsi.content,
                    msgsi.row_num
                ORDER BY
                    STR_TO_MILLIS(msgsi.timestamp) ASC
                LIMIT 1
            )[0],
            last_message = (
                FROM
                    msgs AS msgsi
                SELECT 
                    COALESCE(msgsi.content, msgsi.tool_calls) AS content,
                    msgsi.row_num
                ORDER BY 
                    STR_TO_MILLIS(msgsi.timestamp) DESC
                LIMIT 1
            )[0],
            context = (
                FROM 
                    msgs AS msgsi
                WHERE
                    msgsi.row_num != first_human.row_num AND 
                    msgsi.row_num != last_message.row_num
                SELECT VALUE
                   COALESCE(msgsi.content, msgsi.tool_calls)
            )
        SELECT
            first_human.content AS question,
            last_message.content AS answer,
            context AS contexts,
            ROW_NUMBER() OVER () AS row_num
    )
    SELECT
        (FROM LatestGenerations e SELECT VALUE e.question ORDER BY e.row_num) AS question,
        (FROM LatestGenerations e SELECT VALUE e.answer ORDER BY e.row_num) AS answer,
        (FROM LatestGenerations e SELECT VALUE e.contexts ORDER BY e.row_num) AS contexts
""")
ragas_dataset = datasets.Dataset.from_dict(list(query)[0])
ragas_dataset

Dataset({
    features: ['answer', 'contexts', 'question'],
    num_rows: 37
})

In [24]:
import langchain_openai
import ragas.llms
import ragas.metrics

chat_model = langchain_openai.ChatOpenAI(model="gpt-4o")
evaluator_llm = ragas.llms.LangchainLLMWrapper(chat_model)
score = ragas.evaluate(
    ragas_dataset,
    metrics=[
        ragas.metrics.AnswerRelevancy(llm=evaluator_llm),
        ragas.metrics.ContextUtilization(llm=evaluator_llm),
    ],
)
score.to_pandas()

Evaluating: 100%|██████████| 74/74 [00:32<00:00,  2.30it/s]


Unnamed: 0,user_input,retrieved_contexts,response,answer_relevancy,context_utilization
0,(The following message is from the orchestrato...,[# Agent\n\nI am a workflow orchestrator. You ...,talk_to_user({'message': 'Please provide the n...,0.847936,1.0
1,(The following message is from the orchestrato...,[# Agent\n\nI am a workflow orchestrator. You ...,talk_to_user({'message': 'Please provide the s...,0.844437,1.0
2,(The following message is from the orchestrato...,[# Agent\n\nI am a workflow orchestrator. You ...,talk_to_user({'message': 'Please provide the s...,0.837004,1.0
3,(The following message is from the orchestrato...,[# Agent\n\nI am a workflow orchestrator. You ...,talk_to_user({'message': 'Please provide the s...,0.869787,1.0
4,(The following message is from the orchestrato...,[# Agent\n\nI am a workflow orchestrator. You ...,find_direct_routes_between_airports({'argument...,0.815323,1.0
5,(The following message is from the orchestrato...,[# Agent\n\nI am a workflow orchestrator. You ...,find_direct_routes_between_airports({'argument...,0.793692,1.0
6,(The following message is from the orchestrato...,[# Agent\n\nI am a workflow orchestrator. You ...,mark_task_69c6c897_successful({'source_airport...,0.792364,0.833333
7,(The following message is from the orchestrato...,[# Agent\n\nI am a workflow orchestrator. You ...,find_routes_with_one_layover({'argument_input'...,0.814374,1.0
8,(The following message is from the orchestrato...,[# Agent\n\nI am a workflow orchestrator. You ...,mark_task_bd89f75d_successful({'source_airport...,0.786626,0.833333
9,(The following message is from the orchestrato...,[# Agent\n\nI am a workflow orchestrator. You ...,mark_task_d5ebe28e_successful({'task_result': 1}),0.790309,1.0


# Agent Quality around Walks

In [22]:
# We want to make sure that our agent is routed to the right tasks.
# We can do this by analyzing the agent's walks.
query = cluster.analytics_query("""
    FROM
        `travel-sample`.agent_activity.Walks() t
    SELECT
        t;
""")
list(query)

[{'msgs': [{'content': {'extra': {'status': 'FAILED'},
     'from_node': 'find_source_and_dest'},
    'kind': 'transition',
    'msg_num': 13,
    'timestamp': '2024-11-20T13:16:41.778069-08:00'}],
  'sid': 'e73eb8777f194cecaef9954864f1d7b4',
  'vid': {'identifier': '582e23b276386435c59289c13d22df58873631bf',
   'timestamp': '2024-11-20T18:51:38.490768Z'}},
 {'msgs': [{'content': {'extra': {'status': 'RUNNING'},
     'to_node': 'find_source_and_dest'},
    'kind': 'transition',
    'msg_num': 1,
    'timestamp': '2024-11-20T13:03:27.041697-08:00'},
   {'content': {'extra': {'status': 'SUCCESSFUL'},
     'from_node': 'find_source_and_dest'},
    'kind': 'transition',
    'msg_num': 10,
    'timestamp': '2024-11-20T13:04:00.754507-08:00'},
   {'content': {'extra': {'status': 'RUNNING'},
     'to_node': 'find_travel_routes'},
    'kind': 'transition',
    'msg_num': 11,
    'timestamp': '2024-11-20T13:04:01.318589-08:00'},
   {'content': {'extra': {'status': 'SUCCESSFUL'},
     'from_node

# Agent Quality around Tool Selection

In [23]:
# We want to assert that our agent is selecting the right tools for the right tasks.
# Again, we provide a view to help you enable this.
query = cluster.analytics_query("""
    FROM
        `travel-sample`.agent_activity.ToolCalls() t
    SELECT
        t;
""")
list(query)

[{'t': {'sid': 'e73eb8777f194cecaef9954864f1d7b4',
   'tool_calls': [{'tool_args': {'message': 'Please provide the source airport and the destination airport so I can return their IATA codes for you.'},
     'tool_call_id': 'call_nCkJ33tZQH8OgxEbQ3JNLa3Y',
     'tool_name': 'talk_to_user',
     'tool_result': "i want to go to Mars, I'm from Saturn",
     'tool_status': 'success'},
    {'tool_args': {'message': 'Please provide the source airport and the destination airport so I can return their IATA codes for you.'},
     'tool_call_id': 'call_nCkJ33tZQH8OgxEbQ3JNLa3Y',
     'tool_name': 'talk_to_user',
     'tool_result': "i want to go to Mars, I'm from Saturn",
     'tool_status': 'success'},
    {'tool_args': {'message': "It seems like you're mentioning planets! For this task, I need the names of actual airports on Earth. Could you please provide the name of the source airport and the destination airport?"},
     'tool_call_id': 'call_JJHaneHQs3JFqVobBZZ1x3TI',
     'tool_name': 'tal