In [1]:
import copy
import json
import os
import numpy
from typing import Any, Callable, Dict, List, Literal, Optional, Tuple, Union

from openai import BadRequestError

import autogen
from autogen import UserProxyAgent, AssistantAgent, ConversableAgent
from autogen import config_list_from_json
from autogen.agentchat import Agent
from autogen.agentchat.contrib.agent_optimizer import AgentOptimizer
from autogen.agentchat.contrib.math_user_proxy_agent import MathUserProxyAgent
from autogen.code_utils import extract_code
from autogen.math_utils import get_answer

## Data

In [2]:
def read_file(url):
    file = open(url, "r")
    data = file.read().split('\n')
    return data

In [3]:
phoMT_dev_en = read_file("data/PhoMT/detokenization/dev/dev.en")
phoMT_dev_vi = read_file("data/PhoMT/detokenization/dev/dev.vi")
phoMT_test_en = read_file("data/PhoMT/detokenization/test/test.en")
phoMT_test_vi = read_file("data/PhoMT/detokenization/test/test.vi")
phoMT_train_en = read_file("data/PhoMT/detokenization/train/train.en")
phoMT_train_vi = read_file("data/PhoMT/detokenization/train/train.vi")

In [4]:
phoMT_dev_en[0] = phoMT_dev_en[0][1:]

In [5]:
phoMT_dev_envi = [];
for index in range(len(phoMT_dev_en)):
    phoMT_dev_envi.append({"question":phoMT_dev_en[index], "answer": phoMT_dev_vi[index]})
phoMT_test_envi = [];
for index in range(len(phoMT_test_en)):
    phoMT_test_envi.append({"question": phoMT_test_en[index], "answer": phoMT_test_vi[index]})
phoMT_train_envi = [];
for index in range(len(phoMT_train_en)):
    phoMT_train_envi.append({"question": phoMT_train_en[index], "answer": phoMT_train_vi[index]})
# phoMT_dev_envi

In [6]:
phoMT_dev_envi[0]["question"]

'Hurricane Dorian, one of the most powerful storms ever recorded in the Atlantic Ocean, made landfall as a Category 5 storm on Great Abaco Island in the northern Bahamas on Sunday morning, September 1, 2019.'

## Benchmark

### COMET

In [7]:
from comet import download_model, load_from_checkpoint

# Load the model checkpoint:
model = load_from_checkpoint('./XCOMET-XL/checkpoints/model.ckpt')

Encoder model frozen.
/home/kaylous/workspace/ics/llms/.venv/lib/python3.11/site-packages/pytorch_lightning/core/saving.py:195: Found keys that are not in the model state dict but in the checkpoint: ['encoder.model.embeddings.position_ids']


In [8]:
def get_score_comet(src, ans, res):
    data = [
        {
            "src": src,
            "mt" : res,
            "ref": ans
        }
    ]
    return model.predict(data, batch_size=8, gpus=1).system_score

### SacreBleu

In [9]:
from sacrebleu.metrics import BLEU, CHRF, TER
bleu = BLEU()

def get_score(src, ans, res):
     return bleu.corpus_score([ans], [[res]]).score

## Agent init

### Agents declarations

In [10]:
llm_config = {
    "config_list": [
        {
            "model": "llama3",
            "base_url": "http://localhost:11434/v1",
            "api_key": "ollama",
        }
    ]
}

# user_proxy = autogen.UserProxyAgent(
#     name="Userproxyagent",
#     human_input_mode="NEVER",
#     code_execution_config={"work_dir": "_output", "use_docker": False},
# )

### Custom UserProxyAgent

In [11]:
class JudgeProxyAgent(UserProxyAgent):
    MAX_CONSECUTIVE_AUTO_REPLY = 10
    DEFAULT_REPLY_TEMPLATE = "Generate a response more closely resembling the style, detail, and tone of the provided answer. Focus on specifying key elements to capture the nuances of this answer effectively. The answer: "
    PROMPTS = """Translate a sentence from English to Vietnamese. Produce an accurate, context-sensitive translation that maintains the tone and meaning of the original sentence. Ensure that the output sounds natural for native Vietnamese speakers.
    Respond only with the requested output. Do not include any explanations, introductions, follow-up remarks, or additional feedback. Provide exactly and only what is specified in the task.
    The text:
    """

    def __init__(
        self,
        name: Optional[str] = "JudgeChatAgent",
        human_input_mode: Literal["ALWAYS", "NEVER", "TERMINATE"] = "NEVER",
        **kwargs,
    ):
        super().__init__(
            name=name,
            human_input_mode=human_input_mode,
            **kwargs,
        )
        self.register_reply(
            trigger=ConversableAgent, reply_func=JudgeProxyAgent._check_final_result, position=0
        )
        self.max_function_call_trial = 3
        self.query = None
        self._answer = None
        self.is_correct = None
        self.previous = None
        # self.history = []

    def initiate_chat(
        self,
        recipient,
        history: List[Dict] = [],
        answer=None,
        previous=None,
        silent: Optional[bool] = False,
        max_turns = 3,
        **context,
    ):
        self.query = context["message"]
        self._answer = answer
        self.previous = previous
        
        self.is_correct = None
        self.max_function_call_trial = max_turns
        recipient.history = history
        print("Recipient.history: " + str(len(recipient.history)))
        
        self._prepare_chat(recipient, True)
        
        # for msg in history:
        #     self._append_oai_message(
        #         message=msg['content'],
        #         role=msg['role'],
        #         conversation_id = recipient if msg['role'] == "user" else self
        #     )
        
        error_message = None
        try:
            prompt = self.PROMPTS + context['message']
            self.send(prompt, recipient, silent=silent)
        except BadRequestError as e:
            error_message = str(e)
            self.is_correct = 0
            print("error information: {}".format(error_message))

        recipient.reset()
        self.is_correct = copy.deepcopy(self.is_correct)
        result = self.is_correct
        self._reset()
        return result

    def receive(
        self,
        message: Union[Dict, str],
        sender: Agent,
        request_reply: Optional[bool] = None,
        silent: Optional[bool] = False,
    ):
        self._process_received_message(message, sender, silent)
        if request_reply is False or request_reply is None and self.reply_at_receive[sender] is False:
            return

        self.is_correct = self.chat_messages[sender][-1].get("content")
        if self._answer is not None:
            if (get_score(self.query, self.is_correct, self._answer) >= 90):
                return

        self.max_function_call_trial = self.max_function_call_trial - 1
        if (self.max_function_call_trial <= 0):
            self.max_function_call_trial = 0
            return

        # reply = f"Using the original sentence: {self.query}, provide guidance to improve the quality of {self.is_correct}. Focus on enhancing accuracy, tone, fluency, and contextual appropriateness."
        reply = f"Translate the sentence: {self.query} into Vietnamese, ensuring that when the translation is appended to the previous translated paragraph: {self.previous}, the entire paragraph remains coherent, meaningful, and contextually appropriate. Respond with only the translated sentence, without any additional commentary or explanation."
        if self._answer is not None:
            reply = f'Analyze the original sentence: {self.query}, the expected Vietnamese translation: {self._answer}, and the generated translation: {self.is_correct}. Identify the differences between {self.is_correct} and {self._answer}, and provide guidance to improve the translation so it aligns more closely with {self._answer}. Focus on preserving meaning, tone, style, and naturalness in Vietnamese while addressing any discrepancies.'
        # if self._answer is not None:
        #     reply = self.generate_reply(messages=self.chat_messages[sender], sender=sender)
        self.send(reply, sender, silent=silent)

    def _check_final_result(
        self,
        messages: Optional[List[Dict]] = None,
        sender: Optional[Agent] = None,
        config: Optional[Any] = None,
    ):
        messages = messages[-1]
        if isinstance(messages, dict):
            messages = messages.get("content")
            if messages is None:
                return False, None
            if (messages.find("\n") >= 0):
                print("Response longer than expected?\n" + messages)
                # messages = messages.split("\n")[0]

        self.is_correct = messages
        temp_score = get_score(self.query, messages, self._answer)
        print("Score: " + str(temp_score))
        if (temp_score >= 90):
            return True, "The result is passable. Please reply me with the same answer as before."
        return False, None

    def _reset(self):
        # super()._reset()
        self.max_function_call_trial = 0
        self.is_correct = None
        self.query = None
        self._answer = None
        self.previous = None

In [12]:
class PromptAssistant(AssistantAgent):
    PROMPTS = """Translate a sentence from English to Vietnamese. Produce an accurate, context-sensitive translation that maintains the tone and meaning of the original sentence. Ensure that the output sounds natural for native Vietnamese speakers.
    Respond only with the requested output. Do not include any explanations, introductions, follow-up remarks, or additional feedback. Provide exactly and only what is specified in the task.
    The text:
    """
    TRIMMING_PROMPT = "Respond with only the improved translation, without any additional explanations or commentary."
    def __init__(
        self,
        after: Agent,
        before: Agent = None,
        name: Optional[str] = "PromptAssistantAgent",
        human_input_mode: Literal["ALWAYS", "NEVER", "TERMINATE"] = "NEVER",
        **kwargs,
    ):
        super().__init__(
            name=name,
            human_input_mode=human_input_mode,
            **kwargs,
        )
        self.before = before
        self.after = after
        self.history = []
        self.is_start = False
        self.last_msg = "NaH"

    def initiate_chat(
        self,
        recipient,
        history: List[Dict] = None,
        silent: Optional[bool] = False,
        max_turns = 3,
        **kwargs
    ):
        self.is_start = True
        self._prepare_chat(recipient, True)
        for msg in history:
            self._append_oai_message(
                message=msg['content'],
                role=msg['role'],
                conversation_id = recipient
            )
        print(len(self.chat_messages_for_summary(recipient)))
        error_message = None
        try:
            prompt = self.PROMPTS + kwargs['message']
            self.send(prompt, recipient, silent=silent)
        except BadRequestError as e:
            error_message = str(e)
            print("error information: {}".format(error_message))
        recipient.reset()
        self.is_start = False
        print(self.chat_messages_for_summary(recipient))
        return self.chat_messages_for_summary(recipient)[-1]['content']

    def receive(
        self,
        message: Union[Dict, str],
        sender: Agent,
        request_reply: Optional[bool] = None,
        silent: Optional[bool] = False,
    ):
        self._process_received_message(message, sender, silent)
        if request_reply is False or request_reply is None and self.reply_at_receive[sender] is False:
            return
        if self.is_start:
            # reply = self.generate_reply(messages=self.chat_messages[sender], sender=sender)
            # if reply is not None:
            #     self.send(reply, sender, silent=silent)
            return
        if (sender == self.before):
            reply = sender.chat_messages_for_summary(self)[0]['content']
            if len(sender.chat_messages_for_summary(self)) > 1:
                state, res = self.generate_oai_reply(messages=self.chat_messages[sender], sender=sender)
                if state:
                    reply = res
                # reply = self.generate_reply(messages=self.chat_messages[sender], sender=sender)
            else:
                print("Self.history: " + str(len(self.history)))
                for msg in self.history:
                    self._append_oai_message(
                        message=msg['content'],
                        role=msg['role'],
                        conversation_id = self.after
                    )
            if reply is not None:
                reply = reply + "\n\n" + self.TRIMMING_PROMPT
                self.last_msg = reply
                print(self.last_msg)
                self.send(reply, self.after, silent=silent, request_reply=True)
                if (self._oai_messages[self.after][-1]['content'].find('Note') != -1):
                    temp = self._oai_messages[self.after][-1]['content']
                    self._oai_messages[self.after][-1]['content'] = temp[:temp.find('Note')].strip()
                # print(self.chat_messages_for_summary(self.after))
                self.history = self.chat_messages_for_summary(self.after)
                self.send(self.chat_messages_for_summary(self.after)[-1]['content'], self.before, silent=silent)


In [13]:
LLM = autogen.AssistantAgent(
    name="LLM",
    system_message="You are a helpful assistant",
    code_execution_config=False,
    llm_config=llm_config,
)

Judge = JudgeProxyAgent(
    name="Judge",
    system_message="You are an advisor",
    code_execution_config=False,
)

PromptGenerator = PromptAssistant(
    before=Judge,
    after=LLM,
    name="PromptGenerator",
    system_message="You are a prompt engineer",
    human_input_mode = "NEVER",
    code_execution_config=False,
    llm_config=llm_config,
)

In [14]:
# result = Judge.initiate_chat(
#     recipient = PromptGenerator,
#     max_turns = 2,
#     message = phoMT_dev_envi[4]["question"],
#     # answer = phoMT_dev_envi[4]["answer"]
# )

In [15]:
# PromptGenerator.last_msg
# PromptGenerator.history
    # print(Judge.chat_messages(PromptGenerator)[x])

In [16]:
# PromptGenerator.chat_messages_for_summary(LLM)

In [17]:
# print(result)

In [18]:
# judge_result = "Tới thời điểm hiện nay, không có tin tức về người bị thương trong 46 Nhà xuất bản thuộc hai giáo hội trên đảo Abaco lớn."
# get_score(phoMT_dev_envi[4]["question"], result, phoMT_dev_envi[4]["answer"])

In [19]:
# result = PromptGenerator.initiate_chat(
#     recipient = LLM,
#     max_turns = 2,
#     message = phoMT_dev_envi[4]["question"],
#     history = PromptGenerator.history,
#     clear_history = False
# )

In [20]:
def score_translate(message, answer, turns = 1, history = None):
    if history is None:
        result = Judge.initiate_chat(
            recipient = PromptGenerator,
            max_turns = turns + 1,
            message = message,
            answer = answer,
        )
    else:
        result = Judge.initiate_chat(
            recipient = PromptGenerator,
            max_turns = turns + 1,
            message = message,
            answer = answer,
            history = history
        )
    return [get_score(message, str(result), answer) / 100.0, get_score_comet(message, str(result), answer)]

def score_translate_test(message, answer, turns = 1, history = None, context = None):
    if history is None:
        result = Judge.initiate_chat(
            recipient = PromptGenerator,
            max_turns = turns + 1,
            message = message,
            previous = context
        )
    else:
        result = Judge.initiate_chat(
            recipient = PromptGenerator,
            max_turns = turns + 1,
            message = message,
            history = history,
            previous = context
        )
    # result = PromptGenerator.initiate_chat(
    #     recipient = LLM,
    #     max_turns = turns + 1,
    #     message = message,
    #     history = history,
    #     clear_history = False
    # )
    # return result
    return [get_score(message, str(result), answer) / 100.0, get_score_comet(message, str(result), answer)]

def score_translate_comet(message, answer, turns = 1, history = None):
    if history is None:
        result = Judge.initiate_chat(
            recipient = PromptGenerator,
            max_turns = turns + 1,
            message = message,
            answer = answer,
        )
    else:
        result = Judge.initiate_chat(
            recipient = PromptGenerator,
            max_turns = turns + 1,
            message = message,
            answer = answer,
            history = history
        )
    return get_score_comet(message, str(result), answer)

def score_translate_comet_test(message, answer, turns = 1, history = None, context = None):
    if history is None:
        result = Judge.initiate_chat(
            recipient = PromptGenerator,
            max_turns = turns + 1,
            message = message,
            previous = context
        )
    else:
        result = Judge.initiate_chat(
            recipient = PromptGenerator,
            max_turns = turns + 1,
            message = message,
            history = history,
            previous = context
        )
    return get_score_comet(message, str(result), answer)

## Improve

In [21]:
len(phoMT_test_envi)

19152

In [22]:
train_num = 1
test_num = 1

### Before:

In [23]:
# score_dev = []
# for i in range(10):
#     x = phoMT_dev_envi[i]
#     score_dev.append(score_translate(x['question'], x['answer'], 0))
score_test = []
for i in range(test_num):
    x = phoMT_test_envi[train_num + i]
    score_test.append(score_translate(
        message= x['question'],
        answer= x['answer'],
        turns= 0
    ))

Recipient.history: 0
[33mJudge[0m (to PromptGenerator):

Translate a sentence from English to Vietnamese. Produce an accurate, context-sensitive translation that maintains the tone and meaning of the original sentence. Ensure that the output sounds natural for native Vietnamese speakers.
    Respond only with the requested output. Do not include any explanations, introductions, follow-up remarks, or additional feedback. Provide exactly and only what is specified in the task.
    The text:
    Severe storms ripped through parts of the southern and midwestern United States on January 11 and 12, 2020.

--------------------------------------------------------------------------------
Self.history: 0
Translate a sentence from English to Vietnamese. Produce an accurate, context-sensitive translation that maintains the tone and meaning of the original sentence. Ensure that the output sounds natural for native Vietnamese speakers.
    Respond only with the requested output. Do not include any

HTTP Request: POST http://localhost:11434/v1/chat/completions "HTTP/1.1 200 OK"




Model llama3 is not found. The cost will be 0. In your config_list, add field {"price" : [prompt_price_per_1k, completion_token_price_per_1k]} for customized pricing.


[33mLLM[0m (to PromptGenerator):

Cơn bão mạnh đã qua các khu vực miền nam và trung tâm Hoa Kỳ vào ngày 11-12 tháng một năm 2020.

--------------------------------------------------------------------------------
[33mPromptGenerator[0m (to Judge):

Cơn bão mạnh đã qua các khu vực miền nam và trung tâm Hoa Kỳ vào ngày 11-12 tháng một năm 2020.

--------------------------------------------------------------------------------


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
Predicting DataLoader 0: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:16<00:00, 16.27s/it]


In [24]:
# round(numpy.average(score_test), 2)
round(numpy.average([x[0] for x in score_test]), 2)

0.07

In [25]:
round(numpy.average([x[1] for x in score_test]), 2)

0.97

In [26]:
score_test

[[0.07466558730061357, 0.9703085422515869]]

In [27]:
# score_test = []
# for i in range(test_num):
#     x = phoMT_test_envi[train_num + i]
#     score_test.append(score_translate_comet(
#         message= x['question'],
#         answer= x['answer'],
#         turns= 0
#     ))

In [28]:
# round(numpy.average(score_test), 2)

In [29]:
# score_test

In [30]:
len(phoMT_train_envi)

2978000

### Learning

In [31]:
EPOCH = 1
turns = 1
# optimizer_model = "gpt-4-1106-preview"
# optimizer = AgentOptimizer(max_actions_per_step=3, llm_config=llm_config, optimizer_model="llama3")
history_recorder = []
last_msg_s = []
for index in range(train_num):
    query = phoMT_test_envi[index]
    # is_correct = user_proxy.initiate_chat(assistant, answer=query["answer"], problem=query["question"])
    result = score_translate(query['question'], query['answer'], turns)
    history = PromptGenerator.history
    print(f"Test: {index}")
    history_recorder.extend(history)
    last_msg_s.append(PromptGenerator.last_msg)
    print(len(history_recorder))

Recipient.history: 0
[33mJudge[0m (to PromptGenerator):

Translate a sentence from English to Vietnamese. Produce an accurate, context-sensitive translation that maintains the tone and meaning of the original sentence. Ensure that the output sounds natural for native Vietnamese speakers.
    Respond only with the requested output. Do not include any explanations, introductions, follow-up remarks, or additional feedback. Provide exactly and only what is specified in the task.
    The text:
    Brother Albert Barnett and his wife, Sister Susan Barnett, from the West Congregation in Tuscaloosa, Alabama

--------------------------------------------------------------------------------
Self.history: 0
Translate a sentence from English to Vietnamese. Produce an accurate, context-sensitive translation that maintains the tone and meaning of the original sentence. Ensure that the output sounds natural for native Vietnamese speakers.
    Respond only with the requested output. Do not include an

HTTP Request: POST http://localhost:11434/v1/chat/completions "HTTP/1.1 200 OK"




Model llama3 is not found. The cost will be 0. In your config_list, add field {"price" : [prompt_price_per_1k, completion_token_price_per_1k]} for customized pricing.


[33mLLM[0m (to PromptGenerator):

Anh em Albert Barnett và vợ anh, Chị Susan Barnett, từ Lãnh hội Tây tại Tuscaloosa, Alabama.

--------------------------------------------------------------------------------
[33mPromptGenerator[0m (to Judge):

Anh em Albert Barnett và vợ anh, Chị Susan Barnett, từ Lãnh hội Tây tại Tuscaloosa, Alabama.

--------------------------------------------------------------------------------
[33mJudge[0m (to PromptGenerator):

Analyze the original sentence: Brother Albert Barnett and his wife, Sister Susan Barnett, from the West Congregation in Tuscaloosa, Alabama, the expected Vietnamese translation: Anh Albert Barnett và chị Susan Barnett, thuộc hội thánh West ở Tuscaloosa, Alabama, and the generated translation: Anh em Albert Barnett và vợ anh, Chị Susan Barnett, từ Lãnh hội Tây tại Tuscaloosa, Alabama.. Identify the differences between Anh em Albert Barnett và vợ anh, Chị Susan Barnett, từ Lãnh hội Tây tại Tuscaloosa, Alabama. and Anh Albert Barnett v

HTTP Request: POST http://localhost:11434/v1/chat/completions "HTTP/1.1 200 OK"




Model llama3 is not found. The cost will be 0. In your config_list, add field {"price" : [prompt_price_per_1k, completion_token_price_per_1k]} for customized pricing.


Analysis of the differences:

1. Anh em Albert Barnett và vợ anh, Chị Susan Barnett: The generated translation uses "anh em" (brothers) to translate "Brother", whereas a more accurate translation would use "Anh" (older brother or Mr.) for a masculine title and "Chị" (older sister or Mrs.) for a feminine title. This mismatch affects the tone and style of the translation.

2. từ Lãnh hội Tây: The generated translation uses the phrase "từ Lãnh hội Tây", which doesn't convey the correct meaning. The phrase should be " thuộc hội thánh West" to correctly translate "from the West Congregation".

3. tại Tuscaloosa, Alabama: This part is accurately translated, conveying the location information.

To improve the translation and align it with Anh Albert Barnett và chị Susan Barnett, thuộc hội thánh West ở Tuscaloosa, Alabama:

* Use the correct translations for masculine ("Anh") and feminine titles ("Chị") to maintain tone and style consistency.
* Replace "từ Lãnh hội Tây" with "thuộc hội thánh W

HTTP Request: POST http://localhost:11434/v1/chat/completions "HTTP/1.1 200 OK"




Model llama3 is not found. The cost will be 0. In your config_list, add field {"price" : [prompt_price_per_1k, completion_token_price_per_1k]} for customized pricing.


[33mLLM[0m (to PromptGenerator):

Anh Albert Barnett và chị Susan Barnett, thuộc hội thánh West ở Tuscaloosa, Alabama.

--------------------------------------------------------------------------------
[33mPromptGenerator[0m (to Judge):

Anh Albert Barnett và chị Susan Barnett, thuộc hội thánh West ở Tuscaloosa, Alabama.

--------------------------------------------------------------------------------


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
Predicting DataLoader 0: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:04<00:00,  4.01s/it]


Test: 0
4


In [32]:
len(history_recorder)

4

In [33]:
# Save the history to .json file for future usage
with open('data.json', 'w', encoding='utf-8') as f:
    json.dump(history_recorder, f, ensure_ascii=False, indent=4)

### Testing

In [34]:
# Load the history from data.json file
with open('data.json', 'r') as file:
    history_recorder = json.load(file)

In [35]:
score_test = []
answers = []
for i in range(test_num):
    x = phoMT_test_envi[train_num+i]
    print("Test " + str(i) + ":")
    context = " ".join(answers)
    # if (len(answers)>5) context = answers[-5:].join(" ")
    score_test.append(score_translate_test(
        message = x['question'],
        answer = x['answer'],
        turns = 0,
        history = history_recorder,
        context = context
    ))

Test 0:
Recipient.history: 4
[33mJudge[0m (to PromptGenerator):

Translate a sentence from English to Vietnamese. Produce an accurate, context-sensitive translation that maintains the tone and meaning of the original sentence. Ensure that the output sounds natural for native Vietnamese speakers.
    Respond only with the requested output. Do not include any explanations, introductions, follow-up remarks, or additional feedback. Provide exactly and only what is specified in the task.
    The text:
    Severe storms ripped through parts of the southern and midwestern United States on January 11 and 12, 2020.

--------------------------------------------------------------------------------
Self.history: 4
Translate a sentence from English to Vietnamese. Produce an accurate, context-sensitive translation that maintains the tone and meaning of the original sentence. Ensure that the output sounds natural for native Vietnamese speakers.
    Respond only with the requested output. Do not inc

HTTP Request: POST http://localhost:11434/v1/chat/completions "HTTP/1.1 200 OK"




Model llama3 is not found. The cost will be 0. In your config_list, add field {"price" : [prompt_price_per_1k, completion_token_price_per_1k]} for customized pricing.


[33mLLM[0m (to PromptGenerator):

Cơn bão nghiêm trọng đã qua các khu vực miền nam và trung tâm Hoa Kỳ vào ngày 11-12 tháng một năm 2020.

--------------------------------------------------------------------------------
[33mPromptGenerator[0m (to Judge):

Cơn bão nghiêm trọng đã qua các khu vực miền nam và trung tâm Hoa Kỳ vào ngày 11-12 tháng một năm 2020.

--------------------------------------------------------------------------------


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
Predicting DataLoader 0: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:03<00:00,  3.80s/it]


In [36]:
# round(numpy.average(score_test), 2)
round(numpy.average([x[0] for x in score_test]), 2)

0.07

In [37]:
round(numpy.average([x[1] for x in score_test]), 2)

0.97

In [38]:
# score_test

In [39]:
score_test = []
answers = []
for i in range(test_num):
    x = phoMT_test_envi[train_num+i]
    print("Test " + str(i) + ":")
    context = " ".join(answers)
    # if (len(answers)>5) context = answers[-5:].join(" ")
    score_test.append(score_translate_test(
        message = x['question'],
        answer = x['answer'],
        turns = 1,
        history = history_recorder,
        context = context
    ))

Test 0:
Recipient.history: 4
[33mJudge[0m (to PromptGenerator):

Translate a sentence from English to Vietnamese. Produce an accurate, context-sensitive translation that maintains the tone and meaning of the original sentence. Ensure that the output sounds natural for native Vietnamese speakers.
    Respond only with the requested output. Do not include any explanations, introductions, follow-up remarks, or additional feedback. Provide exactly and only what is specified in the task.
    The text:
    Severe storms ripped through parts of the southern and midwestern United States on January 11 and 12, 2020.

--------------------------------------------------------------------------------
Self.history: 4
Translate a sentence from English to Vietnamese. Produce an accurate, context-sensitive translation that maintains the tone and meaning of the original sentence. Ensure that the output sounds natural for native Vietnamese speakers.
    Respond only with the requested output. Do not inc

HTTP Request: POST http://localhost:11434/v1/chat/completions "HTTP/1.1 200 OK"




Model llama3 is not found. The cost will be 0. In your config_list, add field {"price" : [prompt_price_per_1k, completion_token_price_per_1k]} for customized pricing.


[33mLLM[0m (to PromptGenerator):

Cơn bão mạnh đã qua các khu vực miền nam và trung tâm Hoa Kỳ vào ngày 11-12 tháng một năm 2020.

--------------------------------------------------------------------------------
[33mPromptGenerator[0m (to Judge):

Cơn bão mạnh đã qua các khu vực miền nam và trung tâm Hoa Kỳ vào ngày 11-12 tháng một năm 2020.

--------------------------------------------------------------------------------
[33mJudge[0m (to PromptGenerator):

Translate the sentence: Severe storms ripped through parts of the southern and midwestern United States on January 11 and 12, 2020. into Vietnamese, ensuring that when the translation is appended to the previous translated paragraph: , the entire paragraph remains coherent, meaningful, and contextually appropriate. Respond with only the translated sentence, without any additional commentary or explanation.

--------------------------------------------------------------------------------


HTTP Request: POST http://localhost:11434/v1/chat/completions "HTTP/1.1 200 OK"




Model llama3 is not found. The cost will be 0. In your config_list, add field {"price" : [prompt_price_per_1k, completion_token_price_per_1k]} for customized pricing.


Cơn bão nghiêm trọng đã qua các khu vực phía nam và trung tâm Hoa Kỳ vào ngày 11 và 12 tháng một năm 2020.

Respond with only the improved translation, without any additional explanations or commentary.
[33mPromptGenerator[0m (to LLM):

Cơn bão nghiêm trọng đã qua các khu vực phía nam và trung tâm Hoa Kỳ vào ngày 11 và 12 tháng một năm 2020.

Respond with only the improved translation, without any additional explanations or commentary.

--------------------------------------------------------------------------------


HTTP Request: POST http://localhost:11434/v1/chat/completions "HTTP/1.1 200 OK"




Model llama3 is not found. The cost will be 0. In your config_list, add field {"price" : [prompt_price_per_1k, completion_token_price_per_1k]} for customized pricing.


[33mLLM[0m (to PromptGenerator):

Cơn bão nghiêm trọng đã qua các khu vực phía nam và trung tâm Hoa Kỳ vào ngày 11 và 12 tháng một năm 2020.

--------------------------------------------------------------------------------
[33mPromptGenerator[0m (to Judge):

Cơn bão nghiêm trọng đã qua các khu vực phía nam và trung tâm Hoa Kỳ vào ngày 11 và 12 tháng một năm 2020.

--------------------------------------------------------------------------------


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
Predicting DataLoader 0: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:04<00:00,  4.37s/it]


In [40]:
# round(numpy.average(score_test), 2)
round(numpy.average([x[0] for x in score_test]), 2)

0.08

In [41]:
round(numpy.average([x[1] for x in score_test]), 2)

0.96

In [42]:
# score_test

In [43]:
# score_test = []
# answers = []
# for i in range(test_num):
#     x = phoMT_test_envi[train_num+i]
#     print("Test " + str(i) + ":")
#     context = " ".join(answers)
#     # if (len(answers)>5) context = answers[-5:].join(" ")
#     score_test.append(score_translate_comet_test(
#         message = x['question'],
#         answer = x['answer'],
#         turns = 0,
#         history = history_recorder,
#         context = context
#     ))

In [44]:
# round(numpy.average(score_test), 2)

In [45]:
# score_test

In [46]:
# score_test = []
# answers = []
# for i in range(test_num):
#     x = phoMT_test_envi[train_num+i]
#     print("Test " + str(i) + ":")
#     context = " ".join(answers)
#     # if (len(answers)>5) context = answers[-5:].join(" ")
#     score_test.append(score_translate_comet_test(
#         message = x['question'],
#         answer = x['answer'],
#         turns = 1,
#         history = history_recorder,
#         context = context
#     ))

In [47]:
# round(numpy.average(score_test), 2)

In [48]:
# score_test