# Agent-based Evaluation
Chatbot Evaluation as Multi-agent Simuation

## Define Chatbot

In [1]:
from typing import List

import ollama


# This is flexible, but you can define your agent here, or call your agent API here.
def my_chat_bot(messages: List[dict]) -> dict:
    system_message = {
        "role": "system",
        "content": "You are a customer support agent for an airline.",
    }
    messages = [system_message] + messages
    completion = ollama.chat(
        messages=messages, model="Llama3.2-Korean"
    )
    return completion['message']['content']

In [2]:
my_chat_bot([{"role": "user", "content": "hi!"}])

'Hello! How can I assist you today? Are you looking to book a flight, have questions about your existing ticket, or need help with something else?'

## Define Simulated User

In [4]:
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain_ollama import ChatOllama

system_prompt_template = """You are a customer of an airline company. \
You are interacting with a user who is a customer support person. \

{instructions}

When you are finished with the conversation, respond with a single word 'FINISHED'"""

prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt_template),
        MessagesPlaceholder(variable_name="messages"),
    ]
)
instructions = """Your name is Harrison. You are trying to get a refund for the trip you took to Alaska. \
You want them to give you ALL the money back. \
This trip happened 5 years ago."""

prompt = prompt.partial(name="Harrison", instructions=instructions)

model = ChatOllama(model="Llama3.2-Korean")

simulated_user = prompt | model

In [5]:
from langchain_core.messages import HumanMessage

messages = [HumanMessage(content="Hi! How can I help you?")]
simulated_user.invoke({"messages": messages})

AIMessage(content="I'm Harrison and I booked a flight to Alaska five years ago. Unfortunately, the trip was cancelled due to unforeseen circumstances, and I'd like to request a full refund for my entire ticket price. Can we discuss this further?", additional_kwargs={}, response_metadata={'model': 'Llama3.2-Korean', 'created_at': '2024-12-09T06:58:52.583297Z', 'done': True, 'done_reason': 'stop', 'total_duration': 2215196000, 'load_duration': 31397458, 'prompt_eval_count': 112, 'prompt_eval_duration': 806000000, 'eval_count': 48, 'eval_duration': 1372000000, 'message': Message(role='assistant', content='', images=None, tool_calls=None)}, id='run-68fdace5-bba4-4e59-9c12-f94e020cd0ff-0', usage_metadata={'input_tokens': 112, 'output_tokens': 48, 'total_tokens': 160})