# Agent Quality with RAGAS

In [None]:
import warnings

warnings.simplefilter(action="ignore", category=FutureWarning)

In [None]:
import couchbase.auth
import couchbase.cluster
import couchbase.options
import datasets
import dotenv
import os

dotenv.load_dotenv()

conn_opts = couchbase.options.ClusterOptions(
    authenticator=couchbase.auth.PasswordAuthenticator(
        os.getenv("AGENT_CATALOG_USERNAME"), os.getenv("AGENT_CATALOG_PASSWORD")
    )
)
cluster = couchbase.cluster.Cluster.connect(os.getenv("AGENT_CATALOG_CONN_STRING"), conn_opts)

# We will treat the results of our tools as RAGAS context (we need to transpose our generations).
query = cluster.analytics_query("""
    WITH LatestGenerations AS (
        FROM
            `travel-sample`.agent_activity.raw_logs AS s
        WHERE 
            s.`grouping` IS NOT NULL
        GROUP BY 
            s.`grouping`,
            s.session
            GROUP AS g
        LETTING
            msgs = (
                FROM 
                    g AS gi
                SELECT
                    gi.s.content.content,
                    gi.s.content.tool_calls,
                    gi.s.kind,
                    gi.s.timestamp,
                    ROW_NUMBER() OVER () AS row_num
            ),
            first_human = (
                FROM 
                    msgs AS msgsi
                WHERE 
                    msgsi.kind = "human"
                SELECT
                    msgsi.content,
                    msgsi.row_num
                ORDER BY
                    msgsi.timestamp ASC
                LIMIT 1
            )[0],
            last_message = (
                FROM
                    msgs AS msgsi
                SELECT 
                    COALESCE(msgsi.content, msgsi.tool_calls) AS content,
                    msgsi.row_num
                ORDER BY 
                    msgsi.timestamp DESC
                LIMIT 1
            )[0],
            context = (
                FROM 
                    msgs AS msgsi
                WHERE
                    msgsi.row_num != first_human.row_num AND 
                    msgsi.row_num != last_message.row_num
                SELECT VALUE
                   COALESCE(msgsi.content, msgsi.tool_calls)
            )
        SELECT
            first_human.content AS question,
            last_message.content AS answer,
            context AS contexts,
            ROW_NUMBER() OVER () AS row_num
    )
    SELECT
        (FROM LatestGenerations e SELECT VALUE e.question ORDER BY e.row_num) AS question,
        (FROM LatestGenerations e SELECT VALUE e.answer ORDER BY e.row_num) AS answer,
        (FROM LatestGenerations e SELECT VALUE e.contexts ORDER BY e.row_num) AS contexts
""")
ragas_dataset = datasets.Dataset.from_dict(list(query)[0])
ragas_dataset

In [None]:
import langchain_openai
import ragas.llms
import ragas.metrics

chat_model = langchain_openai.ChatOpenAI(model="gpt-4o")
evaluator_llm = ragas.llms.LangchainLLMWrapper(chat_model)
score = ragas.evaluate(
    ragas_dataset,
    metrics=[
        ragas.metrics.AnswerRelevancy(llm=evaluator_llm),
        ragas.metrics.ContextUtilization(llm=evaluator_llm),
    ],
)
score.to_pandas()

# Agent Quality around Walks

In [None]:
# We want to make sure that our agent is routed to the right tasks.
# We can do this by analyzing the agent's walks.
query = cluster.analytics_query("""
    FROM
        `travel-sample`.agent_activity.Walks() t
    SELECT
        t;
""")
list(query)

# Agent Quality around Tool Selection

In [None]:
# We want to assert that our agent is selecting the right tools for the right tasks.
# Again, we provide a view to help you enable this.
query = cluster.analytics_query("""
    FROM
        `travel-sample`.agent_activity.ToolCalls() t
    SELECT
        t;
""")
list(query)