In [26]:
import copy
import json
import os
import numpy
from typing import Any, Callable, Dict, List, Literal, Optional, Tuple, Union

from openai import BadRequestError

import autogen
from autogen import config_list_from_json
from autogen.agentchat import Agent, AssistantAgent, UserProxyAgent
from autogen.agentchat.contrib.agent_optimizer import AgentOptimizer
from autogen.agentchat.contrib.math_user_proxy_agent import MathUserProxyAgent
from autogen.code_utils import extract_code
from autogen.math_utils import get_answer

## Data

In [27]:
def read_file(url):
    file = open(url, "r")
    data = file.read().split('\n')
    return data

In [28]:
phoMT_dev_en = read_file("data/PhoMT/detokenization/dev/dev.en")
phoMT_dev_vi = read_file("data/PhoMT/detokenization/dev/dev.vi")
phoMT_test_en = read_file("data/PhoMT/detokenization/test/test.en")
phoMT_test_vi = read_file("data/PhoMT/detokenization/test/test.vi")
phoMT_train_en = read_file("data/PhoMT/detokenization/train/train.en")
phoMT_train_vi = read_file("data/PhoMT/detokenization/train/train.vi")

In [29]:
phoMT_dev_en[0] = phoMT_dev_en[0][1:]

In [30]:
# phoMT_train_vi

In [31]:
phoMT_dev_envi = [];
for index in range(len(phoMT_dev_en)):
    phoMT_dev_envi.append({"question":phoMT_dev_en[index], "answer": phoMT_dev_vi[index]})
# phoMT_dev_envi

In [32]:
phoMT_test_envi = [];
for index in range(len(phoMT_test_en)):
    phoMT_test_envi.append({"question": phoMT_test_en[index], "answer": phoMT_test_vi[index]})

In [33]:
phoMT_train_envi = [];
for index in range(len(phoMT_train_en)):
    phoMT_train_envi.append({"question": phoMT_train_en[index], "answer": phoMT_train_vi[index]})

In [34]:
phoMT_train_envi[0]["question"]

'It begins with a countdown.'

## Benchmark

In [35]:
from comet import download_model, load_from_checkpoint

# Choose your model from Hugging Face Hub
# model_path = download_model("Unbabel/XCOMET-XL")
# or for example:
# model_path = download_model("Unbabel/wmt22-comet-da")

# Load the model checkpoint:
model = load_from_checkpoint('./XCOMET-XL/checkpoints/model.ckpt')

Encoder model frozen.
/home/kaylous/workspace/ics/llms/.venv/lib/python3.11/site-packages/pytorch_lightning/core/saving.py:195: Found keys that are not in the model state dict but in the checkpoint: ['encoder.model.embeddings.position_ids']


In [11]:
data = [
    {
        "src": "Boris Johnson teeters on edge of favour with Tory MPs", 
        "mt": "Boris Johnson ist bei Tory-Abgeordneten völlig in der Gunst", 
        "ref": "Boris Johnsons Beliebtheit bei Tory-MPs steht auf der Kippe"
    }
]
model_output = model.predict(data, batch_size=8, gpus=1)
# Segment-level scores
# System-level score
# Score explanation (error spans)

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
Predicting DataLoader 0: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:18<00:00, 18.28s/it]


In [12]:
model_output.scores

[0.45751163363456726]

In [13]:
model_output.system_score

0.45751163363456726

In [14]:
model_output.metadata.error_spans

[[{'text': 'ist bei',
   'confidence': 0.40954869985580444,
   'severity': 'critical',
   'start': 13,
   'end': 21},
  {'text': 'Abgeordnete',
   'confidence': 0.27366378903388977,
   'severity': 'major',
   'start': 27,
   'end': 38},
  {'text': 'völlig in der Gunst',
   'confidence': 0.5219234228134155,
   'severity': 'critical',
   'start': 39,
   'end': 59}]]

In [36]:
def get_score(src, ans, res):
    data = [
        {
            "src": src,
            "mt" : res,
            "ref": ans
        }
    ]
    return model.predict(data, batch_size=8, gpus=1).system_score

## Agent init

### Custom UserProxyAgent

In [None]:
def is_termination_msg_mathchat(message):
    """Check if a message is a termination message."""
    if isinstance(message, dict):
        message = message.get("content")
        if message is None:
            return False
    if message.rstrip().find("TERMINATE") >= 0:
        return True
    return False


class JudgeProxyAgent(UserProxyAgent):
    MAX_CONSECUTIVE_AUTO_REPLY = 10
    DEFAULT_REPLY = "Create a refined prompt that will help the model generate a response more closely resembling the style, detail, and tone of the provided answer: {answer}. Focus on specifying key elements to capture the nuances of this answer effectively."
    PROMPTS = """Create a prompt that instructs a Large Language Model to translate a sentence from English to Vietnamese. The prompt should guide the model to produce an accurate, context-sensitive translation that maintains the tone and meaning of the original sentence. Ensure that the output sounds natural for native Vietnamese speakers.
    The text:
    """

    def __init__(
        self,
        name: Optional[str] = "JudgeChatAgent",
        # is_termination_msg: Optional[Callable[[Dict], bool]] = is_termination_msg_mathchat,
        human_input_mode: Literal["ALWAYS", "NEVER", "TERMINATE"] = "NEVER",
        use_docker= "False",
        # default_auto_reply: Optional[Union[str, Dict, None]] = DEFAULT_REPLY,
        # max_invalid_q_per_step=3,
        **kwargs,
    ):
        super().__init__(
            name=name,
            # is_termination_msg=is_termination_msg,
            human_input_mode=human_input_mode,
            # default_auto_reply=default_auto_reply,
            # max_invalid_q_per_step=max_invalid_q_per_step,
            **kwargs,
        )
        self.register_reply(
            trigger=autogen.ConversableAgent, reply_func=JudgeProxyAgent._check_final_result, position=0
        )
        self.max_function_call_trial = 3
        self.query = None
        self._answer = None
        self.is_correct = None

    def initiate_chat(
        self,
        recipient,
        # recipient2,
        answer: None,
        silent: Optional[bool] = False,
        **context,
    ):
        self.query = context["problem"]
        self._answer = answer
        self.is_correct = None

        self._prepare_chat(recipient, True)
        error_message = None
        try:
            prompt = self.PROMPTS + context["problem"]
            # recipient.initiate_chat(recipient=recipient2, answer=self._answer, problem=prompt)
            self.send(prompt, recipient, silent=silent)
        except BadRequestError as e:
            error_message = str(e)
            self.is_correct = 0
            print("error information: {}".format(error_message))

        recipient.reset()
        is_correct = copy.deepcopy(self.is_correct)
        self._reset()
        return is_correct

    def _check_final_result(
        self,
        messages: Optional[List[Dict]] = None,
        sender: Optional[autogen.Agent] = None,
        config: Optional[Any] = None,
    ):
        messages = messages[-1]
        if isinstance(messages, dict):
            messages = messages.get("content")
            if messages is None:
                return False, None
            if (messages.find("\n") >= 0):
                print("Response longer than expected?\n" + messages)
                messages = messages.split("\n")[0]

        self.is_correct = get_score(self.query, messages, self._answer)
        print("Score: " + self.is_correct)
        if (self.is_correct >= 0.9):
            return True, "The result is passable. Please reply me with TERMINATE."
        return False, None

    def _reset(self):
        super()._reset()
        self.max_function_call_trial = 3
        self.is_correct = None
        self.query = None
        self._answer = None

### Agents declarations

In [37]:
llm_config = {
    "config_list": [
        {
            "model": "llama3",
            "base_url": "http://localhost:11434/v1",
            "api_key": "ollama",
        }
    ]
}

PromptGenerator = autogen.AssistantAgent(
    name="PromptGenerator",
    system_message="You are a prompt engineer. Your only job is to provide a single prompt to a LLM agent to translate.",
    llm_config=llm_config,
    human_input_mode = "NEVER",
)
LLM = autogen.AssistantAgent(
    name="LLM",
    system_message="You are a translator. Your only job is to translate according to the given prompt",
    llm_config=llm_config,
    human_input_mode = "NEVER",
)
Judge = autogen.UserProxyAgent(
    name="Judge",
    # system_message="You are a judge. Your job is to make the translated result looks closest to the answer.",
    code_execution_config=False,
    human_input_mode = "NEVER",
    # llm_config=llm_config,
)
Editor = autogen.AssistantAgent(
    name="Editor",
    system_message="You are an advisor. Your job is to provide guidance.",
    llm_config=llm_config,
    human_input_mode = "NEVER",
)

# groupchat = autogen.GroupChat(
#     agents=[Judge, PromptGenerator, LLM],
#     messages=[],
#     max_round=5,
#     speaker_selection_method="round_robin"
# )

# manager = autogen.GroupChatManager(
#     groupchat=groupchat,
#     llm_config=llm_config
# )
# user_proxy = autogen.UserProxyAgent(
#     name="Userproxyagent",
#     human_input_mode="NEVER",
#     code_execution_config={"work_dir": "_output", "use_docker": False},
# )

### Test

In [None]:
# result = Judge.initiate_chat(
#     recipient = manager,
#     max_turns = 5,
#     message = """Create a prompt that instructs a Large Language Model to translate a sentence from English to Vietnamese. The prompt should guide the model to produce an accurate, context-sensitive translation that maintains the tone and meaning of the original sentence. Ensure that the output sounds natural for native Vietnamese speakers.
#     The text:""" + phoMT_dev_envi[0]["question"],
#     answer = phoMT_dev_envi[0]["answer"]
# )

# prompt_1 = Judge.initiate_chat(
#     recipient = PromptGenerator,
#     max_turns = 1,
#     message = "Create a prompt that instructs a Large Language Model to translate a sentence from English to Vietnamese. The prompt should guide the model to produce an accurate, context-sensitive translation that maintains the tone and meaning of the original sentence. Ensure that the output sounds natural for native Vietnamese speakers. Say nothing other than the prompt, and give me no notes. The text: " + phoMT_dev_envi[0]["question"],
# )

# output = Judge.initiate_chat(
#     recipient = LLM,
#     max_turns = 1,
#     message = prompt_1.summary + " Say nothing other than the translated result, and give me no notes."
# )


In [60]:
# turns = 1
# while (turns > 0):
#     turns = turns - 1;
#     hint = Judge.initiate_chat(
#         recipient = Editor,
#         max_turns = 1,
#         message = f'Analyze the current prompt {prompt_1.summary}, the original sentence {phoMT_dev_envi[0]["question"]}, the expected translation {phoMT_dev_envi[0]["answer"]}, and the generated translation {output.summary}. Identify shortcomings in the current prompt and provide advice on how to improve it to guide the model toward producing translations more closely aligned with {phoMT_dev_envi[0]["answer"]}. Focus on enhancing clarity, specificity, and context awareness in the prompt instructions.'
#     )
#     prompt_2 = Judge.initiate_chat(
#         recipient = PromptGenerator,
#         max_turns = 1,
#         message = hint.summary,
#     )
    
#     output = Judge.initiate_chat(
#         recipient = LLM,
#         max_turns = 1,
#         message = prompt_2.summary
#     )

#     score = get_score(phoMT_dev_envi[0]["question"],phoMT_dev_envi[0]["answer"],output.summary)
#     print(score)
    # is_correct = Judge.initiate_chat(recipient=PromptGenerator, answer=phoMT_dev_envi[0]["answer"], problem=phoMT_dev_envi[0]["question"])
    # print(is_correct)

[33mJudge[0m (to Editor):

Analyze the current prompt "Translate the following English sentence into natural Vietnamese, preserving its meaning, tone, and context: 'Hurricane Dorian, one of the most powerful storms ever recorded in the Atlantic Ocean, made landfall as a Category 5 storm on Great Abaco Island in the northern Bahamas on Sunday morning, September 1, 2019.'", the original sentence Hurricane Dorian, one of the most powerful storms ever recorded in the Atlantic Ocean, made landfall as a Category 5 storm on Great Abaco Island in the northern Bahamas on Sunday morning, September 1, 2019., the expected translation Vào chủ nhật ngày 1-9-2019, cơn bão Dorian, một trong những cơn bão mạnh nhất được ghi nhận ở Đại Tây Dương, với sức gió 362 km/h đổ bộ vào đảo Great Abaco, miền bắc Bahamas., and the generated translation Cơn bão Dorian, một cơn bão mạnh nhất được ghi nhận trong Đại Tây Dương, đã đổ bộ như một cơn bão hạng 5 vào đảo Great Abaco thuộc quần đảo Bahamas phía bắc vào s

HTTP Request: POST http://localhost:11434/v1/chat/completions "HTTP/1.1 200 OK"




Model llama3 is not found. The cost will be 0. In your config_list, add field {"price" : [prompt_price_per_1k, completion_token_price_per_1k]} for customized pricing.


[33mPromptGenerator[0m (to Judge):

Excellent suggestions!

I completely agree with your recommendations for improving the prompt. By including specific dates, providing additional contextual information, and maintaining a formal tone, we can greatly enhance the model's ability to generate a more accurate and contextually relevant translation.

Here is the updated prompt:

"Translate the following English sentence into natural Vietnamese, preserving its meaning, tone, and context: 'Hurricane Dorian, one of the most powerful storms ever recorded in the Atlantic Ocean, made landfall as a Category 5 storm on Great Abaco Island in the northern Bahamas on September 1, 2019. On that day, winds reached speeds of approximately 362 km/h.'"

Thank you for your insightful analysis and suggestions!

--------------------------------------------------------------------------------
[33mJudge[0m (to LLM):

Excellent suggestions!

I completely agree with your recommendations for improving the promp

HTTP Request: POST http://localhost:11434/v1/chat/completions "HTTP/1.1 200 OK"




Model llama3 is not found. The cost will be 0. In your config_list, add field {"price" : [prompt_price_per_1k, completion_token_price_per_1k]} for customized pricing.


[33mLLM[0m (to Judge):

"Cơn bão Dorian, một trong những cơn bão mạnh nhất từng được ghi nhận tại Đại Tây Dương, đã đổ bộ như một cơn bão hạng 5 vào đảo Great Abaco tại quần đảo Bahamas bắc trên ngày 1 tháng 9 năm 2019. Vào ngày đó, gió đạt tốc độ khoảng 362 km/h."

--------------------------------------------------------------------------------


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
Predicting DataLoader 0: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:07<00:00,  7.01s/it]


0.7464637756347656


### Agent pairing

In [64]:
default_prompt_for_trimming = "Respond only with the requested output. Do not include any explanations, introductions, follow-up remarks, or additional feedback. Provide exactly and only what is specified in the task."

def translate(question, answer, turns = 0):
    prompt_1 = Judge.initiate_chat(
        recipient = PromptGenerator,
        max_turns = 1,
        message = "Create a prompt that instructs a Large Language Model to translate a sentence from English to Vietnamese. The prompt should guide the model to produce an accurate, context-sensitive translation that maintains the tone and meaning of the original sentence. Ensure that the output sounds natural for native Vietnamese speakers. Say nothing other than the prompt, and give me no notes. The text: " + question,
    )
    
    output = Judge.initiate_chat(
        recipient = LLM,
        max_turns = 1,
        message = prompt_1.summary + default_prompt_for_trimming
    )
    rt_score = get_score(question, answer, output.summary)
    print(rt_score)
    while (turns > 0):
        turns = turns - 1;
        hint = Judge.initiate_chat(
            recipient = Editor,
            max_turns = 1,
            message = f'Analyze the original sentence {question}, the expected translation {answer}, and the generated translation {output.summary}. Provide advice on how to guide the model toward producing translations more closely aligned with {answer}. Focus on enhancing clarity, specificity, and context awareness in the prompt instructions. ' + default_prompt_for_trimming,
        )
        prompt_2 = hint.summary
        if (prompt_2.find(':') != -1):
            prompt_2 = prompt_2[prompt_2.find(':')+1:]
        # prompt_2 = Judge.initiate_chat(
        #     recipient = PromptGenerator,
        #     max_turns = 1,
        #     message = hint.summary + "Respond only with the requested output. Do not include any explanations, introductions, follow-up remarks, or additional feedback. Provide exactly and only what is specified in the task.",
        # )
        
        output = Judge.initiate_chat(
            recipient = LLM,
            max_turns = 1,
            message = prompt_2 + default_prompt_for_trimming,
        )
    
        score = get_score(question,answer,output.summary)
        print(score)
        rt_score = score
    return rt_score
    

In [65]:
huh = translate(phoMT_dev_envi[4]["question"], phoMT_dev_envi[4]['answer'], 1)

[33mJudge[0m (to PromptGenerator):

Create a prompt that instructs a Large Language Model to translate a sentence from English to Vietnamese. The prompt should guide the model to produce an accurate, context-sensitive translation that maintains the tone and meaning of the original sentence. Ensure that the output sounds natural for native Vietnamese speakers. Say nothing other than the prompt, and give me no notes. The text: At this time, there have been no reported injuries among the 46 publishers in the two congregations on Great Abaco Island.

--------------------------------------------------------------------------------
[33mPromptGenerator[0m (to Judge):

Translate this English sentence into Vietnamese while maintaining the original tone and meaning, ensuring the output sounds natural for native Vietnamese speakers: "At this time, there have been no reported injuries among the 46 publishers in the two congregations on Great Abaco Island."

------------------------------------

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
Predicting DataLoader 0: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:04<00:00,  4.29s/it]


0.6894360780715942
[33mJudge[0m (to Editor):

Analyze the original sentence At this time, there have been no reported injuries among the 46 publishers in the two congregations on Great Abaco Island., the expected translation Theo báo cáo đến thời điểm hiện tại, trong 46 người công bố thuộc hai hội thánh ở đảo Great Abaco thì không có anh chị nào bị thương., and the generated translation "Đến lúc này, không có báo cáo bị thương nào được ghi nhận trong số 46 người xuất bản tham gia hai giáo xứ trên đảo Abaco Grande.". Provide advice on how to guide the model toward producing translations more closely aligned with Theo báo cáo đến thời điểm hiện tại, trong 46 người công bố thuộc hai hội thánh ở đảo Great Abaco thì không có anh chị nào bị thương.. Focus on enhancing clarity, specificity, and context awareness in the prompt instructions. Respond only with the requested output. Do not include any explanations, introductions, follow-up remarks, or additional feedback. Provide exactly and on

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
Predicting DataLoader 0: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:04<00:00,  4.37s/it]


0.874899685382843


In [66]:
huh

0.874899685382843

In [72]:
score_dev = []
for i in range(10):
    x = phoMT_dev_envi[i]
    score_dev.append(translate(x['question'], x['answer'], 0))
score_test = []
for i in range(10):
    x = phoMT_test_envi[i]
    score_test.append(translate(x['question'], x['answer'], 0))

[33mJudge[0m (to PromptGenerator):

Create a prompt that instructs a Large Language Model to translate a sentence from English to Vietnamese. The prompt should guide the model to produce an accurate, context-sensitive translation that maintains the tone and meaning of the original sentence. Ensure that the output sounds natural for native Vietnamese speakers. Say nothing other than the prompt, and give me no notes. The text: Hurricane Dorian, one of the most powerful storms ever recorded in the Atlantic Ocean, made landfall as a Category 5 storm on Great Abaco Island in the northern Bahamas on Sunday morning, September 1, 2019.

--------------------------------------------------------------------------------
[33mPromptGenerator[0m (to Judge):

Translate the following sentence from English to Vietnamese while capturing its original tone and meaning: "Hurricane Dorian, one of the most powerful storms ever recorded in the Atlantic Ocean, made landfall as a Category 5 storm on Great Ab

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
Predicting DataLoader 0: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:06<00:00,  6.78s/it]


0.6979159116744995
[33mJudge[0m (to PromptGenerator):

Create a prompt that instructs a Large Language Model to translate a sentence from English to Vietnamese. The prompt should guide the model to produce an accurate, context-sensitive translation that maintains the tone and meaning of the original sentence. Ensure that the output sounds natural for native Vietnamese speakers. Say nothing other than the prompt, and give me no notes. The text: Dorian is especially dangerous due to its slow movement, high wind speeds, and heavy rains.

--------------------------------------------------------------------------------
[33mPromptGenerator[0m (to Judge):

Translate this English sentence into Vietnamese while preserving the tone and meaning of the original, ensuring a natural-sounding output for native Vietnamese speakers: "Dorian là đặc biệt nguy hiểm do tốc độ di chuyển chậm, gió mạnh và mưa lớn."

--------------------------------------------------------------------------------
[33mJud

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
Predicting DataLoader 0: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:04<00:00,  4.34s/it]


0.8443493843078613
[33mJudge[0m (to PromptGenerator):

Create a prompt that instructs a Large Language Model to translate a sentence from English to Vietnamese. The prompt should guide the model to produce an accurate, context-sensitive translation that maintains the tone and meaning of the original sentence. Ensure that the output sounds natural for native Vietnamese speakers. Say nothing other than the prompt, and give me no notes. The text: The storm passed by the Leeward Islands, Puerto Rico, and the Virgin Islands as a tropical storm with little or no reported damage.

--------------------------------------------------------------------------------
[33mPromptGenerator[0m (to Judge):

Translate this English sentence to Vietnamese while preserving its original tone and meaning, aiming for a natural output that native Vietnamese speakers would find suitable: "The storm passed by the Leeward Islands, Puerto Rico, và đảoVirgin Islands như một cơn bão nhiệt đới với rất ít hoặc không

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
Predicting DataLoader 0: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:04<00:00,  4.51s/it]


0.8997992873191833
[33mJudge[0m (to PromptGenerator):

Create a prompt that instructs a Large Language Model to translate a sentence from English to Vietnamese. The prompt should guide the model to produce an accurate, context-sensitive translation that maintains the tone and meaning of the original sentence. Ensure that the output sounds natural for native Vietnamese speakers. Say nothing other than the prompt, and give me no notes. The text: The United States branch office continues to gather information while monitoring the storm's impact on our brothers and also on branch - owned properties.

--------------------------------------------------------------------------------
[33mPromptGenerator[0m (to Judge):

Translate this sentence from English to Vietnamese: "Cơ quan chi nhánh Hoa Kỳ tiếp tục thu thập thông tin trong khi theo dõi tác động của cơn bão trên anh em của chúng tôi và cũng trên các tài sản do chi nhánh sở hữu."

-------------------------------------------------------

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
Predicting DataLoader 0: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:04<00:00,  4.62s/it]


0.9221010804176331
[33mJudge[0m (to PromptGenerator):

Create a prompt that instructs a Large Language Model to translate a sentence from English to Vietnamese. The prompt should guide the model to produce an accurate, context-sensitive translation that maintains the tone and meaning of the original sentence. Ensure that the output sounds natural for native Vietnamese speakers. Say nothing other than the prompt, and give me no notes. The text: At this time, there have been no reported injuries among the 46 publishers in the two congregations on Great Abaco Island.

--------------------------------------------------------------------------------
[33mPromptGenerator[0m (to Judge):

Translate this English sentence into Vietnamese while maintaining the original tone and meaning, ensuring the output sounds natural for native Vietnamese speakers: "At this time, there have been no reported injuries among the 46 publishers in the two congregations on Great Abaco Island."

-----------------

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
Predicting DataLoader 0: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:04<00:00,  4.30s/it]


0.6894360780715942
[33mJudge[0m (to PromptGenerator):

Create a prompt that instructs a Large Language Model to translate a sentence from English to Vietnamese. The prompt should guide the model to produce an accurate, context-sensitive translation that maintains the tone and meaning of the original sentence. Ensure that the output sounds natural for native Vietnamese speakers. Say nothing other than the prompt, and give me no notes. The text: However, the only Kingdom Hall on the island was destroyed.

--------------------------------------------------------------------------------
[33mPromptGenerator[0m (to Judge):

Please translate this English sentence into Vietnamese while considering the cultural context and ensuring the tone and meaning remain intact, producing an output that sounds natural for native Vietnamese speakers: "However, the only Kingdom Hall on the island was destroyed."

--------------------------------------------------------------------------------
[33mJudge

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
Predicting DataLoader 0: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:03<00:00,  3.50s/it]


0.8660420179367065
[33mJudge[0m (to PromptGenerator):

Create a prompt that instructs a Large Language Model to translate a sentence from English to Vietnamese. The prompt should guide the model to produce an accurate, context-sensitive translation that maintains the tone and meaning of the original sentence. Ensure that the output sounds natural for native Vietnamese speakers. Say nothing other than the prompt, and give me no notes. The text: On Grand Bahama Island, there are four congregations and 364 publishers.

--------------------------------------------------------------------------------
[33mPromptGenerator[0m (to Judge):

Translate this English sentence into Vietnamese while conveying its original context and tone, taking care to produce a natural-sounding output that native Vietnamese speakers would find accurate and fluent: "On Grand Bahama Island, there are four congregations and 364 publishers."

-------------------------------------------------------------------------

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
Predicting DataLoader 0: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:03<00:00,  3.54s/it]


0.6848405003547668
[33mJudge[0m (to PromptGenerator):

Create a prompt that instructs a Large Language Model to translate a sentence from English to Vietnamese. The prompt should guide the model to produce an accurate, context-sensitive translation that maintains the tone and meaning of the original sentence. Ensure that the output sounds natural for native Vietnamese speakers. Say nothing other than the prompt, and give me no notes. The text: Initial reports indicate that 196 of our brothers are displaced and 22 homes have sustained damage.

--------------------------------------------------------------------------------
[33mPromptGenerator[0m (to Judge):

Translate this English sentence into Vietnamese while preserving tone, meaning, and context, producing a natural-sounding translation that native speakers would understand: "Initial reports indicate that 196 of our brothers are displaced and 22 homes have sustained damage."

------------------------------------------------------

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
Predicting DataLoader 0: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:03<00:00,  3.86s/it]


0.8475453853607178
[33mJudge[0m (to PromptGenerator):

Create a prompt that instructs a Large Language Model to translate a sentence from English to Vietnamese. The prompt should guide the model to produce an accurate, context-sensitive translation that maintains the tone and meaning of the original sentence. Ensure that the output sounds natural for native Vietnamese speakers. Say nothing other than the prompt, and give me no notes. The text: Three homes have been destroyed.

--------------------------------------------------------------------------------
[33mPromptGenerator[0m (to Judge):

Translate this English sentence into Vietnamese while preserving its original tone and meaning, taking into account the cultural context of Vietnam, to produce a translation that sounds natural and accurate when spoken by a native Vietnamese speaker: "Three homes have been destroyed."

--------------------------------------------------------------------------------
[33mJudge[0m (to LLM):

Tra

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
Predicting DataLoader 0: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:03<00:00,  3.01s/it]


0.9862620234489441
[33mJudge[0m (to PromptGenerator):

Create a prompt that instructs a Large Language Model to translate a sentence from English to Vietnamese. The prompt should guide the model to produce an accurate, context-sensitive translation that maintains the tone and meaning of the original sentence. Ensure that the output sounds natural for native Vietnamese speakers. Say nothing other than the prompt, and give me no notes. The text: The branch provided instruction in advance of the storm to local circuit overseers and elders in the affected areas.

--------------------------------------------------------------------------------
[33mPromptGenerator[0m (to Judge):

Translate this English sentence into Vietnamese while preserving the original tone and meaning, taking into account the nuances of Vietnamese language and culture. Produce a natural-sounding translation that would be suitable for native speakers: "The branch provided instruction in advance of the storm to local 

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
Predicting DataLoader 0: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:04<00:00,  4.06s/it]


0.7204471230506897
[33mJudge[0m (to PromptGenerator):

Create a prompt that instructs a Large Language Model to translate a sentence from English to Vietnamese. The prompt should guide the model to produce an accurate, context-sensitive translation that maintains the tone and meaning of the original sentence. Ensure that the output sounds natural for native Vietnamese speakers. Say nothing other than the prompt, and give me no notes. The text: Brother Albert Barnett and his wife, Sister Susan Barnett, from the West Congregation in Tuscaloosa, Alabama

--------------------------------------------------------------------------------
[33mPromptGenerator[0m (to Judge):

"Translate this English sentence into Vietnamese while considering context, tone, and meaning: 'Brother Albert Barnett và vợ của ông, Chị Susan Barnett, từ Phân hội Tây ở Tuscaloosa, Alabama.' Pay close attention to maintaining nuance and idiomatic expressions."

---------------------------------------------------------

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
Predicting DataLoader 0: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:03<00:00,  3.74s/it]


0.7495207190513611
[33mJudge[0m (to PromptGenerator):

Create a prompt that instructs a Large Language Model to translate a sentence from English to Vietnamese. The prompt should guide the model to produce an accurate, context-sensitive translation that maintains the tone and meaning of the original sentence. Ensure that the output sounds natural for native Vietnamese speakers. Say nothing other than the prompt, and give me no notes. The text: Severe storms ripped through parts of the southern and midwestern United States on January 11 and 12, 2020.

--------------------------------------------------------------------------------
[33mPromptGenerator[0m (to Judge):

"Translate this English sentence into Vietnamese while maintaining its original tone and meaning, ensuring a context-sensitive and natural-sounding translation for native Vietnamese speakers: Severe storms ripped through parts of the southern and midwestern United States on January 11 and 12, 2020."

--------------------

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
Predicting DataLoader 0: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:03<00:00,  3.92s/it]


0.9515814781188965
[33mJudge[0m (to PromptGenerator):

Create a prompt that instructs a Large Language Model to translate a sentence from English to Vietnamese. The prompt should guide the model to produce an accurate, context-sensitive translation that maintains the tone and meaning of the original sentence. Ensure that the output sounds natural for native Vietnamese speakers. Say nothing other than the prompt, and give me no notes. The text: Two days of heavy rain, high winds, and numerous tornadoes caused major damage across multiple states.

--------------------------------------------------------------------------------
[33mPromptGenerator[0m (to Judge):

Translate the following sentence from English to Vietnamese while maintaining the original tone, meaning, and context: "Two days of heavy rain, high winds, and numerous tornadoes caused major damage across multiple states."

--------------------------------------------------------------------------------
[33mJudge[0m (to LL

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
Predicting DataLoader 0: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:03<00:00,  3.90s/it]


0.9704889059066772
[33mJudge[0m (to PromptGenerator):

Create a prompt that instructs a Large Language Model to translate a sentence from English to Vietnamese. The prompt should guide the model to produce an accurate, context-sensitive translation that maintains the tone and meaning of the original sentence. Ensure that the output sounds natural for native Vietnamese speakers. Say nothing other than the prompt, and give me no notes. The text: Sadly, Brother Albert Barnett and his wife, Sister Susan Barnett, 85 and 75 years old respectively, were killed when a tornado struck their mobile home.

--------------------------------------------------------------------------------
[33mPromptGenerator[0m (to Judge):

Please translate this sentence from English to Vietnamese while preserving its original tone, context, and meaning, ensuring the translation sounds natural for native speakers: "Sadly, Brother Albert Barnett và vợ ông, Sister Susan Barnett, respectfully là 85 và 75 tuổi, đã bị

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
Predicting DataLoader 0: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:04<00:00,  4.67s/it]


0.27348989248275757
[33mJudge[0m (to PromptGenerator):

Create a prompt that instructs a Large Language Model to translate a sentence from English to Vietnamese. The prompt should guide the model to produce an accurate, context-sensitive translation that maintains the tone and meaning of the original sentence. Ensure that the output sounds natural for native Vietnamese speakers. Say nothing other than the prompt, and give me no notes. The text: The United States branch also reports that at least four of our brothers' homes sustained minor damage, along with two Kingdom Halls.

--------------------------------------------------------------------------------
[33mPromptGenerator[0m (to Judge):

Translate the given English sentence into Vietnamese while maintaining its original tone and meaning, ensuring a natural-sounding translation that is context-sensitive and accurate for native Vietnamese speakers.

--------------------------------------------------------------------------------


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
Predicting DataLoader 0: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:04<00:00,  4.29s/it]


0.20169812440872192
[33mJudge[0m (to PromptGenerator):

Create a prompt that instructs a Large Language Model to translate a sentence from English to Vietnamese. The prompt should guide the model to produce an accurate, context-sensitive translation that maintains the tone and meaning of the original sentence. Ensure that the output sounds natural for native Vietnamese speakers. Say nothing other than the prompt, and give me no notes. The text: Additionally, the storms caused major damage to a brother's business property.

--------------------------------------------------------------------------------
[33mPromptGenerator[0m (to Judge):

Translate this English sentence: "Additionally, the storms caused major damage to a brother's business property." into a natural-sounding Vietnamese sentence that accurately conveys the tone and meaning of the original sentence, considering the nuances of context and cultural relevance for native Vietnamese speakers.

------------------------------

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
Predicting DataLoader 0: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:03<00:00,  3.65s/it]


0.9630578756332397
[33mJudge[0m (to PromptGenerator):

Create a prompt that instructs a Large Language Model to translate a sentence from English to Vietnamese. The prompt should guide the model to produce an accurate, context-sensitive translation that maintains the tone and meaning of the original sentence. Ensure that the output sounds natural for native Vietnamese speakers. Say nothing other than the prompt, and give me no notes. The text: Local elders and the circuit overseer are offering practical and spiritual support to those affected by this disaster.

--------------------------------------------------------------------------------
[33mPromptGenerator[0m (to Judge):

Translate the following sentence into Vietnamese while preserving tone and meaning, aiming for a natural-sounding output suitable for native speakers: "Local elders and the circuit overseer are offering practical and spiritual support to those affected by this disaster."

--------------------------------------

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
Predicting DataLoader 0: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:04<00:00,  4.13s/it]


0.7122097611427307
[33mJudge[0m (to PromptGenerator):

Create a prompt that instructs a Large Language Model to translate a sentence from English to Vietnamese. The prompt should guide the model to produce an accurate, context-sensitive translation that maintains the tone and meaning of the original sentence. Ensure that the output sounds natural for native Vietnamese speakers. Say nothing other than the prompt, and give me no notes. The text: We know that our heavenly Father, Jehovah, is providing comfort to our brothers and sisters who are grieving because of this tragedy.

--------------------------------------------------------------------------------
[33mPromptGenerator[0m (to Judge):

Translate the following sentence from English to Vietnamese while preserving its original tone and meaning: "We know that our heavenly Father, Jehovah, is providing comfort to our brothers and sisters who are grieving because of this tragedy."

---------------------------------------------------

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
Predicting DataLoader 0: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:04<00:00,  4.45s/it]


0.8728129863739014
[33mJudge[0m (to PromptGenerator):

Create a prompt that instructs a Large Language Model to translate a sentence from English to Vietnamese. The prompt should guide the model to produce an accurate, context-sensitive translation that maintains the tone and meaning of the original sentence. Ensure that the output sounds natural for native Vietnamese speakers. Say nothing other than the prompt, and give me no notes. The text: International government agencies and officials have responded to Russia's Supreme Court decision that criminalizes the worship of Jehovah's Witnesses in Russia.

--------------------------------------------------------------------------------
[33mPromptGenerator[0m (to Judge):

Translate the following English sentence into Vietnamese while preserving the tone, meaning, and nuance, aiming for a natural-sounding output that would be comprehensible to native Vietnamese speakers: "International government agencies and officials have responded to

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
Predicting DataLoader 0: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:04<00:00,  4.60s/it]


0.8588100671768188
[33mJudge[0m (to PromptGenerator):

Create a prompt that instructs a Large Language Model to translate a sentence from English to Vietnamese. The prompt should guide the model to produce an accurate, context-sensitive translation that maintains the tone and meaning of the original sentence. Ensure that the output sounds natural for native Vietnamese speakers. Say nothing other than the prompt, and give me no notes. The text: These statements have criticized Russia's unjust and harsh judicial action against a minority religious group known for peaceful religious activity.

--------------------------------------------------------------------------------
[33mPromptGenerator[0m (to Judge):

Please translate the following English sentence into Vietnamese while maintaining its original tone and meaning, ensuring a natural-sounding output that native speakers would appreciate: "These statements have criticized Russia's unjust and harsh judicial action against a minority

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
Predicting DataLoader 0: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:04<00:00,  4.65s/it]


0.957837700843811


In [73]:
numpy.average(score_dev)

0.8158738791942597

In [74]:
score_dev

[0.6979159116744995,
 0.8443493843078613,
 0.8997992873191833,
 0.9221010804176331,
 0.6894360780715942,
 0.8660420179367065,
 0.6848405003547668,
 0.8475453853607178,
 0.9862620234489441,
 0.7204471230506897]

In [75]:
numpy.average(score_test)

0.7511507511138916

In [76]:
score_test

[0.7495207190513611,
 0.9515814781188965,
 0.9704889059066772,
 0.27348989248275757,
 0.20169812440872192,
 0.9630578756332397,
 0.7122097611427307,
 0.8728129863739014,
 0.8588100671768188,
 0.957837700843811]

In [15]:
test_score = []
for i in range(10):
    x = phoMT_test_envi[i]
    prompt_1 = Judge.initiate_chat(
        recipient = PromptGenerator,
        max_turns = 1,
        silent=True,
        message = "Create a prompt that instructs a Large Language Model to translate a sentence from English to Vietnamese. The prompt should guide the model to produce an accurate, context-sensitive translation that maintains the tone and meaning of the original sentence. Ensure that the output sounds natural for native Vietnamese speakers. Say nothing other than the prompt, and give me no notes. The text: " + x["question"],
    )
    
    output = Judge.initiate_chat(
        recipient = LLM,
        max_turns = 1,
        silent=True,
        message = prompt_1.summary + " Say nothing other than the translated result, and give me no notes."
    )
    # get_score(x["question"],x["answer"],output.summary)
    test_score.append(get_score(x["question"],x["answer"],output.summary))

HTTP Request: POST http://localhost:11434/v1/chat/completions "HTTP/1.1 200 OK"




Model llama3 is not found. The cost will be 0. In your config_list, add field {"price" : [prompt_price_per_1k, completion_token_price_per_1k]} for customized pricing.
HTTP Request: POST http://localhost:11434/v1/chat/completions "HTTP/1.1 200 OK"




Model llama3 is not found. The cost will be 0. In your config_list, add field {"price" : [prompt_price_per_1k, completion_token_price_per_1k]} for customized pricing.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
Predicting DataLoader 0: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:04<00:00,  4.44s/it]
HTTP Request: POST http://localhost:11434/v1/chat/completions "HTTP/1.1 200 OK"




Model llama3 is not found. The cost will be 0. In your config_list, add field {"price" : [prompt_price_per_1k, completion_token_price_per_1k]} for customized pricing.
HTTP Request: POST http://localhost:11434/v1/chat/completions "HTTP/1.1 200 OK"




Model llama3 is not found. The cost will be 0. In your config_list, add field {"price" : [prompt_price_per_1k, completion_token_price_per_1k]} for customized pricing.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
Predicting DataLoader 0: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:04<00:00,  4.85s/it]
HTTP Request: POST http://localhost:11434/v1/chat/completions "HTTP/1.1 200 OK"




Model llama3 is not found. The cost will be 0. In your config_list, add field {"price" : [prompt_price_per_1k, completion_token_price_per_1k]} for customized pricing.
HTTP Request: POST http://localhost:11434/v1/chat/completions "HTTP/1.1 200 OK"




Model llama3 is not found. The cost will be 0. In your config_list, add field {"price" : [prompt_price_per_1k, completion_token_price_per_1k]} for customized pricing.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
Predicting DataLoader 0: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:05<00:00,  5.24s/it]
HTTP Request: POST http://localhost:11434/v1/chat/completions "HTTP/1.1 200 OK"




Model llama3 is not found. The cost will be 0. In your config_list, add field {"price" : [prompt_price_per_1k, completion_token_price_per_1k]} for customized pricing.
HTTP Request: POST http://localhost:11434/v1/chat/completions "HTTP/1.1 200 OK"




Model llama3 is not found. The cost will be 0. In your config_list, add field {"price" : [prompt_price_per_1k, completion_token_price_per_1k]} for customized pricing.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
Predicting DataLoader 0: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:05<00:00,  5.72s/it]
HTTP Request: POST http://localhost:11434/v1/chat/completions "HTTP/1.1 200 OK"




Model llama3 is not found. The cost will be 0. In your config_list, add field {"price" : [prompt_price_per_1k, completion_token_price_per_1k]} for customized pricing.
HTTP Request: POST http://localhost:11434/v1/chat/completions "HTTP/1.1 200 OK"




Model llama3 is not found. The cost will be 0. In your config_list, add field {"price" : [prompt_price_per_1k, completion_token_price_per_1k]} for customized pricing.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
Predicting DataLoader 0: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:04<00:00,  4.52s/it]
HTTP Request: POST http://localhost:11434/v1/chat/completions "HTTP/1.1 200 OK"




Model llama3 is not found. The cost will be 0. In your config_list, add field {"price" : [prompt_price_per_1k, completion_token_price_per_1k]} for customized pricing.
HTTP Request: POST http://localhost:11434/v1/chat/completions "HTTP/1.1 200 OK"




Model llama3 is not found. The cost will be 0. In your config_list, add field {"price" : [prompt_price_per_1k, completion_token_price_per_1k]} for customized pricing.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
Predicting DataLoader 0: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:03<00:00,  3.97s/it]
HTTP Request: POST http://localhost:11434/v1/chat/completions "HTTP/1.1 200 OK"




Model llama3 is not found. The cost will be 0. In your config_list, add field {"price" : [prompt_price_per_1k, completion_token_price_per_1k]} for customized pricing.
HTTP Request: POST http://localhost:11434/v1/chat/completions "HTTP/1.1 200 OK"




Model llama3 is not found. The cost will be 0. In your config_list, add field {"price" : [prompt_price_per_1k, completion_token_price_per_1k]} for customized pricing.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
Predicting DataLoader 0: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:05<00:00,  5.57s/it]
HTTP Request: POST http://localhost:11434/v1/chat/completions "HTTP/1.1 200 OK"




Model llama3 is not found. The cost will be 0. In your config_list, add field {"price" : [prompt_price_per_1k, completion_token_price_per_1k]} for customized pricing.
HTTP Request: POST http://localhost:11434/v1/chat/completions "HTTP/1.1 200 OK"




Model llama3 is not found. The cost will be 0. In your config_list, add field {"price" : [prompt_price_per_1k, completion_token_price_per_1k]} for customized pricing.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
Predicting DataLoader 0: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:05<00:00,  5.21s/it]
HTTP Request: POST http://localhost:11434/v1/chat/completions "HTTP/1.1 200 OK"




Model llama3 is not found. The cost will be 0. In your config_list, add field {"price" : [prompt_price_per_1k, completion_token_price_per_1k]} for customized pricing.
HTTP Request: POST http://localhost:11434/v1/chat/completions "HTTP/1.1 200 OK"




Model llama3 is not found. The cost will be 0. In your config_list, add field {"price" : [prompt_price_per_1k, completion_token_price_per_1k]} for customized pricing.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
Predicting DataLoader 0: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:05<00:00,  5.67s/it]
HTTP Request: POST http://localhost:11434/v1/chat/completions "HTTP/1.1 200 OK"




Model llama3 is not found. The cost will be 0. In your config_list, add field {"price" : [prompt_price_per_1k, completion_token_price_per_1k]} for customized pricing.
HTTP Request: POST http://localhost:11434/v1/chat/completions "HTTP/1.1 200 OK"




Model llama3 is not found. The cost will be 0. In your config_list, add field {"price" : [prompt_price_per_1k, completion_token_price_per_1k]} for customized pricing.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
Predicting DataLoader 0: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:05<00:00,  5.03s/it]


In [17]:
# numpy.average(test_score)
score

[0.9600645303726196,
 0.811298131942749,
 0.6998796463012695,
 0.8009799718856812,
 0.563008189201355,
 0.8376463651657104,
 0.7987921833992004,
 0.784834086894989,
 0.20421035587787628,
 0.761147677898407,
 0.7218590974807739,
 0.9796053171157837,
 0.9803764820098877,
 0.3576646149158478,
 0.7130599617958069,
 0.7944188714027405,
 0.6968633532524109,
 0.912351131439209,
 0.7582926750183105,
 0.9653881192207336]

## 

## Improve

In [None]:
EPOCH = 10
optimizer_model = "gpt-4-1106-preview"
optimizer = AgentOptimizer(max_actions_per_step=3, llm_config=llm_config)
for i in range(EPOCH):
    for index, query in enumerate(train_data):
        is_correct = user_proxy.initiate_chat(assistant, answer=query["answer"], problem=query["question"])
        history = assistant.chat_messages_for_summary(user_proxy)
        optimizer.record_one_conversation(history, is_satisfied=is_correct)
    register_for_llm, register_for_exector = optimizer.step()
    for item in register_for_llm:
        assistant.update_function_signature(**item)
    if len(register_for_exector.keys()) > 0:
        user_proxy.register_function(function_map=register_for_exector)

## Compare

In [None]:
sum = 0
for index, query in enumerate(test_data):
    is_correct = user_proxy.initiate_chat(recipient=assistant, answer=query["answer"], problem=query["question"])
    sum += is_correct
success_rate_with_agent_training = sum / 10