In [1]:
OPENAI_API_KEY=''

In [2]:
from openai import OpenAI
client = OpenAI(api_key=OPENAI_API_KEY)

In [3]:
MAX_TOKEN = 6000
MAX_CHAT = 10
SCENARIO_ID = 1
PERSONA = 'rebellious_people'
AGENT_NAME = 'sparky'

In [4]:
from enum import Enum
from pydantic import BaseModel

class UserAction(str, Enum):
    say = "say"
    leave = "leave"

class UserResponse(BaseModel):
    action: UserAction
    answer: str

In [5]:
import json
class Person:
    def __init__(self, scenarioID: int, persona: str):

        scenarioFile = open(f'./user/{AGENT_NAME}/scenario{scenarioID}.txt', 'r')
        scenario = scenarioFile.read()
        scenarioFile.close()

        personaFile = open(f'./persona/{persona}.txt', 'r')
        persona = personaFile.read()
        personaFile.close()
        example = '''
                    Example1:
                    Input: Are you looking for relaxation techniques or some fun trivia games to de-stress? Let me help you find the best fit!
                    Output: {action: say, answer: 'I am looking for relaxation techniques'}
                    Example2:
                    Input: There's a wonderful bear named Bruno who specializes in relaxation techniques. Would you like to meet him for some calming mindfulness tips?
                    Output: {action: leave, answer: 'Yes, meeting Bruno sounds lovely! I would love to get some calming mindfulness tips from him.'}
                  '''
        systemPrompt = f'You are a user talking to AI APP which can help you deal with your problem during break time. \
                            This is your persona: {persona}\
                            Please play the role according to the scenario: {scenario}\
                            Use Action → Answer structure for responses.\
                            Available Actions:\
                            1. say: respond base on persona and scenario\
                            2. leave: leave the chat when you think the conversation is over, no need to continue\
                            Examples:\n{example}'
        # print(systemPrompt)

        self.messages = [
            {'role': 'system', 'content': systemPrompt}, 
        ]
        self.leaveChat = False
    
    def say(self):
        response = client.chat.completions.create(
            model="gpt-4o-mini",
            messages=self.messages,
            response_format={
                'type': 'json_schema',
                'json_schema': 
                    {
                        "name":"whocares", 
                        "schema": UserResponse.model_json_schema()
                    }
            }
        )

        message = response.choices[0].message.content
        self.messages.append({'role': 'assistant', 'content': message})

        # str to dict
        message = json.loads(message)
        self.leaveChat = (message['action'] == 'leave')

        info = {
            'token': response.usage.total_tokens,
        }
        
        return message['answer'], info
    
    def listen(self, message: str):
        self.messages.append({'role': 'user', 'content': message})

        

In [6]:
test = Person(SCENARIO_ID, PERSONA)
ans = test.say()
print(ans)

('What does this app do?', {'token': 372})


In [None]:
class sparkyActionV0(str, Enum):
    call_bruno = "call_bruno"
    call_bizy = "call_bizy"
    ask_more = "ask_more"
    introduce_bruno = "introduce_bruno"
    introduce_bizy = "introduce_bizy"
    advise = "advise"

class sparkyActionV1(str, Enum):
    guide_to_bruno = "guide_to_bruno"
    guide_to_bizy = "guide_to_bizy"
    explore = "explore"
    introduce_bruno = "introduce_bruno"
    introduce_bizy = "introduce_bizy"

In [27]:
class bizyActionV0(str, Enum):
    greet = "greet"
    start_analysis = "start_analysis"
    analysing = "analysing"
    finish_analysis = "finish_analysis"
    ask_excuse = "ask_excuse"
    change_excuse = "change_excuse"
    advise = "advise"

In [28]:
class AgentResponse(BaseModel):
    action: bizyActionV0
    answer: str

In [29]:
import time
import json

class Animal:
    def __init__(self, name, version: str):

        self.agent_name = name
        agentFile = open(f'./agent/{name}_{version}.txt', 'r')
        agentPrompt = agentFile.read()
        agentFile.close()

        # create assistant and thread
        self.assistant = client.beta.assistants.create(
            name = self.agent_name,
            instructions = agentPrompt,
            model="gpt-4o-mini",
            response_format={
                'type': 'json_schema',
                'json_schema': 
                    {
                        "name":"whocares", 
                        "schema": AgentResponse.model_json_schema()
                    }
            }
        )
        self.thread = client.beta.threads.create()
        self.user_message = 'hello'
    
    def create_thread(self):
        self.thread = client.beta.threads.create()

    def say(self):

        prompt = client.beta.threads.messages.create(
            thread_id = self.thread.id,
            role = "user",
            content = self.user_message
        )

        run = client.beta.threads.runs.create_and_poll(
            thread_id=self.thread.id,
            assistant_id=self.assistant.id,
        )

        while True:
            runData = client.beta.threads.runs.retrieve(
                thread_id=self.thread.id,
                run_id=run.id
            )

            if runData.status == 'completed': 
                response = client.beta.threads.messages.list(
                    thread_id=self.thread.id
                )
                message = json.loads(response.data[0].content[0].text.value)

                info = {
                    'token': runData.usage.total_tokens,
                    'action': message['action']
                }
                return message['answer'], info

            else:
                print("runData.status")
                time.sleep(2) 


    def listen(self, message: str):
        self.user_message = message
    

In [26]:
test = Animal(name = AGENT_NAME, version='V0')
print(test.say())
test.listen('i feel tired')
print(test.say())

("Buzz! Hello there! I'm Bizy, your energetic forest friend ready to help you tackle procrastination and manage your time. How can I assist you today?", {'token': 847, 'action': 'introduce_bizy'})
("Buzz buzz, feeling tired can be tough! What's your current excuse for putting off your tasks? Let's dive in and see how we can shift that mindset!", {'token': 899, 'action': 'ask_excuse'})


In [10]:
class responseType(str, Enum):
    perfectly_match = "Perfectly Match"
    good_response = "Good Response"
    bad_response = "Bad Response"

In [11]:
from pydantic import BaseModel

class EvaluatorResponse(BaseModel):
    accuracy: int
    practicality: int

class OverallEvaluatorResponse(BaseModel):
    type: responseType
    reason: str

In [12]:
import json
class Evaluator():
    def __init__(self, agentName: str, scenarioID: int) -> None:

        systemPrompt = '''You are an evaluator. I will provide you with a user’s statement and an agent’s response.
                            You should evaluate the accuracy and practicality base on the scenario.
                            - Accuracy: Score from 0 to 10. This measures whether the model’s response appropriately addresses the user’s statement.
                            - Practicality: Score from 0 to 10. This evaluates whether the model’s suggestion is helpful to the user.
                        '''
        
        with open(f'./evaluator/{agentName}/scenario{scenarioID}.txt', 'r') as file:
            systemPrompt += file.read()
        with open(f'./evaluator/examples.txt', 'r') as file:
            systemPrompt += file.read()
    
        self.messages = [
            {'role': 'system', 'content': systemPrompt}, 
        ]
        
    def evaluate(self, personMessage: str, animalMessage: str):
        self.messages.append({'role': 'user', 'content': f'User: {personMessage}\nAgent: {animalMessage}'})
        response = client.chat.completions.create(
            model="gpt-4o-mini",
            messages=self.messages,
            response_format={
                'type': 'json_schema',
                'json_schema': 
                    {
                        "name":"whocares", 
                        "schema": EvaluatorResponse.model_json_schema()
                    }
            }
        )

        message = json.loads(response.choices[0].message.content)
    
        return message['accuracy'], message['practicality']
    
    def overall_evaluate(self, dialogues: list, agentName, scenarioID):
        overall_systemPrompt = ''' You are an evaluator. Now You have to evaluate agent's behavior.
                                I will provide you a scenario with expect agent behaviors and a dialogue contains user's statement and an agent's response.
                                User's question and answer might lead to different types of agent responses. So it's important to consider both scenario expect behavior and the whole dialogue before making a decision.
                                You should classify the agent's response into one of the following types:
                                1. 'Perfectly Match' : Match at least one of the expected agent behaviors we provided.
                                2. 'Good Response' : Didn't match any of the expected agent behavior we provided, but still a good response that can help the user.
                                3. 'Bad Response' : Didn't match the expected agent behavior we provided, and can not help the user or not practical.

                                For Reason, please be concise and brief, reply less than 15 words. 
                                    Perfectly Match: Match which behavior.
                                    Good Response: How it can help the user.
                                    Bad Response: Why it can not help the user.
                                **Perform action call/guide_to directly without introduce is definetly bad response.**
                            '''
        # overall_systemPrompt += '''
        #     Scenario:
        #     - Description: You are facing difficulties or feeling down in a romantic relationship and seeks emotional support.
        #     - Expect Agent Behavior:
        #         1. Provide emotional support.

        #     Example1:
        #     User: I'm feeling really sad because my relationship isn't going well. Is there anything that could help comfort me?,
        #     Agent: I'm really sorry to hear that! I know a wonderful owl named Bruno who specializes in mindfulness and relaxation. Would you like to meet him for some comfort and techniques to help you through this?,714,9,8
        #     User: "Yes, meeting Bruno sounds lovely! I would love to get some comfort and mindfulness techniques from him.",
        #     Agent: Fantastic! Bruno will be thrilled to help you find some comfort and mindfulness techniques. Let's go see him now!,775,8,9

        #     Score: 'Good Response'

        #     Example2:
        #     User: I'm feeling really sad because my relationship isn't going well. Is there anything that could help comfort me?
        #     Agent: Sorry for hearing that. Maybe you can try to talk to bizy, he might have some good advice for you.

        #     Score: 'Bad Response'

        #     Example3:
        #     User: I'm feeling really sad because my relationship isn't going well. Is there anything that could help comfort me?
        #     Agent: It sounds like you’re going through a tough time, and that’s completely valid. Don't forget to care for yourself and seek support from friends or family. It’s okay to give yourself time to work through everything. You’re not alone in this!
        #     Score: 'Perfactly Match'
        # '''

        prompt = ''
        with open(f'./evaluator/{agentName}/scenario{scenarioID}.txt', 'r') as file:
            prompt += 'Scenario:\n' + file.read()
        
        with open(f'./evaluator/{agentName}/{agentName}_eval.txt', 'r') as file:
            prompt += 'Agent Introduction:\n' + file.read()
        
        dialogue = '\n'.join(dialogues)
        prompt += 'Dialogue:\n' + dialogue
        
        message = [
            {'role': 'system', 'content': overall_systemPrompt},
            {'role': 'assistant', 'content': prompt}
        ]

        response = client.chat.completions.create(
            model="gpt-4o-mini",
            messages=message,
            response_format={
                'type': 'json_schema',
                'json_schema': 
                    {
                        "name":"whocares", 
                        "schema": OverallEvaluatorResponse.model_json_schema()
                    }
            }
        )

        message = json.loads(response.choices[0].message.content)
        print(message)
        return message['type'], message['reason']
        


In [15]:
dialogue = ['user: What does this app do?', 'agent: This app is here to connect you with friendly animal specialists who can help you with various challenges! Whether it\'s stress, procrastination, or making friends, we\'ve got furry and buzzing friends ready to lend a paw or wing!', 'user+ How can you help me?', 'agent: What are you struggling with right now? Whether it\'s stress, procrastination, or something else, I can introduce you to the perfect animal specialist!', 'user :I\'m not sure yet, but I would like to find something to help with stress.', 'agent: I have just the friend for you! Meet Bruno, the mindfulness master bear who can help you find calm and tackle stress—would you like to chat with him?', 'user: Yes, I would love to chat with Bruno and learn how to tackle stress!', 'agent: Fantastic! Bruno is waiting for you, ready to share his peaceful paws and calming wisdom. Get ready to relax and unwind!']
evaluator = Evaluator(agentName = 'sparky', scenarioID = 1)
evaluator.overall_evaluate(dialogue, 'sparky', 1)

{'type': 'Perfectly Match', 'reason': "Introduced Bruno and suggested interaction to address user's stress."}


('Perfectly Match',
 "Introduced Bruno and suggested interaction to address user's stress.")

In [16]:
test = Evaluator(AGENT_NAME, SCENARIO_ID)
test.evaluate('What does this app do?', 'I do not know')

(0, 0)

In [17]:
import pandas as pd
class Report():
    def __init__(self) -> None:
        self.finish = True
        self.chatHistory = []

    def addHistory(self, chat):
        self.chatHistory.append(chat)
    
    def generateReport(self, filename:str):
        df = pd.DataFrame(self.chatHistory)
        df.to_csv(f'{filename}.csv', index=False)

In [30]:
MAX_TOKEN = 6000
MAX_CHAT = 10
SCENARIO_ID = 1
PERSONA = 'emma'
AGENT_NAME = 'bizy'
PROMPT_VERSION = 'V0'


In [32]:
from tqdm import trange

overall_evaluate = []
animal = Animal(name= AGENT_NAME, version= PROMPT_VERSION)

for i in trange(1,24):

    SCENARIO_ID = i
    person = Person(scenarioID = SCENARIO_ID, persona = PERSONA)
    # animal = Animal(name= AGENT_NAME)
    animal.create_thread()
    evaluator = Evaluator(agentName= AGENT_NAME, scenarioID= SCENARIO_ID)
    report = Report()

    totalToken = 0
    totalChat = 0
    dialogue = []

    while not person.leaveChat:
        # chat
        personMessage, personInfo = person.say()
        animal.listen(personMessage)
        animalMessage, animalInfo = animal.say()
        person.listen(animalMessage)

        # print(f'User: {personMessage}\nAgent: {animalMessage}')
        # print(f'user leave chat: {person.leaveChat}')

        # metrics
        accuracy, practicality = evaluator.evaluate(personMessage, animalMessage)
        # print(f'Accuracy: {accuracy}, Practicality: {practicality}\n')
        
        history = {
            'person_say': personMessage,
            'animal_action': animalInfo['action'],
            'animal_say': animalMessage,
            'animal_token': animalInfo['token'],
            'accuracy': accuracy,
            'practicality': practicality,
        }
        dialogue.append(f'user: {personMessage}, agent: {animalMessage}')

        report.addHistory(history)
        totalChat += 1
        totalToken += animalInfo['token']

        if totalToken > MAX_TOKEN or totalChat > MAX_CHAT:
            report.finish = False
            break

    report.generateReport(filename=f'report_{AGENT_NAME}_{SCENARIO_ID}')
    
    classification = evaluator.overall_evaluate(dialogue, AGENT_NAME, SCENARIO_ID)
    overall_evaluate.append(classification)
    
    print(totalToken, totalChat)


  4%|▍         | 1/23 [00:37<13:52, 37.82s/it]

{'type': 'Perfectly Match', 'reason': 'Analyzes procrastination and offers practical strategies to overcome it.'}
6512 6


  9%|▊         | 2/23 [01:37<17:40, 50.49s/it]

{'type': 'Perfectly Match', 'reason': 'Provided structured study plan and motivation tips as expected.'}
6785 6


 13%|█▎        | 3/23 [02:24<16:17, 48.87s/it]

{'type': 'Perfectly Match', 'reason': 'Suggested breaking tasks into smaller, manageable steps.'}
6601 6


 17%|█▋        | 4/23 [03:08<14:51, 46.90s/it]

{'type': 'Perfectly Match', 'reason': 'Provided strategies for enhancing productivity and learning efficiency.'}
5322 5


 22%|██▏       | 5/23 [04:57<20:51, 69.55s/it]

{'type': 'Good Response', 'reason': 'Encourages starting small, which helps alleviate overwhelm.'}
8996 4


 26%|██▌       | 6/23 [06:08<19:48, 69.92s/it]

{'type': 'Bad Response', 'reason': 'Response lacks suggestions for improving the learning environment.'}
6769 6


 30%|███       | 7/23 [06:57<16:51, 63.19s/it]

{'type': 'Perfectly Match', 'reason': 'Provided methods to increase learning motivation and celebrate mini-wins.'}
6666 6


 35%|███▍      | 8/23 [07:47<14:45, 59.07s/it]

{'type': 'Good Response', 'reason': 'Encourages positive study habits but lacks specific time-tracking tool suggestions.'}
6497 6


 39%|███▉      | 9/23 [08:18<12:55, 55.36s/it]

{'type': 'Perfectly Match', 'reason': 'Provided strategies for time estimation and buffer time.'}
2860 3





FileNotFoundError: [Errno 2] No such file or directory: './user/bizy/scenario10.txt'

In [98]:
file_path = f'{AGENT_NAME}_{PROMPT_VERSION}_overall_evaluate.csv'

df = pd.DataFrame(overall_evaluate, columns=['type', 'reason'])
df.insert(0, 'scenario_id', range(1, len(df) + 1))

df.to_csv(file_path, index=False)