In [1]:
# MQM Categories
# 0: "Accuracy",
# 1: "Fluency",§
# 2: "Locale convention",
# 3: "No-error",
# 4: "Other",
# 5: "Terminology",

In [2]:
import sys

sys.path.append("../")

from gemba.CREDENTIALS import credentials
from gemba.prompt import prompts, language_codes
from gemba.gpt_api import GptApi
from gemba.cache import Cache

gptapi = GptApi(credentials, verbose=False)

use_model = "GPT-4"
annotation = "GEMBA-DA"
cache = Cache(f"{use_model}_{annotation}.jsonl")

In [3]:
def get_translation_quality(src, hyp, src_lng="en", trg_lng="de", ref=None):
    lng = "en-de"
    if prompts[annotation]["use_ref"] and ref is None:
        raise ValueError("Reference is required for this metric")

    data = {
        "source_seg": src,
        "target_seg": hyp,
        "reference_seg": ref,
        "source_lang": src_lng,
        "target_lang": trg_lng,
    }
    prompt = prompts[annotation]["prompt"].format(**data)
    parsed_answers = gptapi.request(
        prompt, use_model, prompts[annotation]["validate_answer"], cache=cache
    )
    return parsed_answers[0]["answer"]

In [4]:
get_translation_quality("This is a test", "Dies ist ein Test")

100

## MQM

In [5]:
from gemba.gemba_mqm_utils import (
    TEMPLATE_GEMBA_MQM_ENDE,
    TEMPLATE_GEMBA_MQM,
    apply_template,
    parse_mqm_answer,
)
from collections import defaultdict


In [6]:
use_model = "GPT-4"
cache = Cache(f"{use_model}_GEMBA-MQM.jsonl")

In [7]:


def get_mqm_erros(src, hyp, src_lng="en", trg_lng="de", ref=None):
    use_model = "GPT-4"
    cache = Cache(f"{use_model}_GEMBA-MQM.jsonl")
    data = {
        "source_seg": src,
        "target_seg": hyp,
        "source_lang": src_lng,
        "target_lang": trg_lng,
    }
    prompt = apply_template(TEMPLATE_GEMBA_MQM, data)
    parsed_answers = gptapi.request(
        prompt,
        use_model,
        lambda x: parse_mqm_answer(x, list_mqm_errors=True, full_desc=False),
        cache=cache,
    )

    errors = defaultdict(list)
    errors.update(parsed_answers[0]["answer"])
    error_list = errors["minor"] + errors["major"] + errors["critical"]

    return error_list

In [8]:
src = (
    "According to the British government's "
    "reasonable worst-case"
    " scenario, the flow of goods could be cut by half on Day One of a no-deal Brexit and could take a year to recover. It said time was "
    "extremely limited "
    " if the shipping issues were to be resolved by the end of October."
)
hyp = "Laut dem „vernünftigen Worst-Case“ -Szenario der britischen Regierung könnte der Warenfluss am ersten Tag eines No-Deal-Brexits um die Hälfte reduziert werden und könnte ein Jahr dauern, um sich zu erholen. Es hieß, die Zeit sei „extrem begrenzt“, wenn die Schifffahrtsprobleme bis Ende Oktober gelöst werden sollten."
src_lng = "en"
trg_lng = "de"

get_mqm_erros(src, hyp, src_lng, trg_lng)

['fluency']

## Suggest postedit

In [9]:


def get_postedit(src, hyp, src_lng="en", trg_lng="de", ref=None):
    use_model = "GPT-4"
    annotation = "POSTEDIT"
    cache = Cache(f"{use_model}_{annotation}.jsonl")
    data = {
        "source_seg": src,
        "target_seg": hyp,
        "source_lang": src_lng,
        "target_lang": trg_lng,
    }
    prompt = prompts[annotation]["prompt"].format(**data)
    parsed_answers = gptapi.request(
        prompt, use_model, prompts[annotation]["validate_answer"], cache=cache
    )
    return parsed_answers[0]["answer"]

Saving cache GPT-4_GEMBA-MQM.jsonl


In [10]:
src = (
    "According to the British government's "
    "reasonable worst-case"
    " scenario, the flow of goods could be cut by half on Day One of a no-deal Brexit and could take a year to recover. It said time was "
    "extremely limited "
    " if the shipping issues were to be resolved by the end of October."
)
hyp = "Laut dem „vernünftigen Worst-Case“ -Szenario der britischen Regierung könnte der Warenfluss am ersten Tag eines No-Deal-Brexits um die Hälfte reduziert werden und könnte ein Jahr dauern, um sich zu erholen. Es hieß, die Zeit sei „extrem begrenzt“, wenn die Schifffahrtsprobleme bis Ende Oktober gelöst werden sollten."
src_lng = "en"
trg_lng = "de"

get_postedit(src, hyp, src_lng, trg_lng)


'"Laut dem „vernünftigen Worst-Case“-Szenario der britischen Regierung könnte der Warenfluss am ersten Tag eines No-Deal-Brexits um die Hälfte reduziert werden und es könnte ein Jahr dauern, bis er sich erholt. Es wurde gesagt, dass die Zeit extrem begrenzt sei, wenn die Probleme mit dem Warenverkehr bis Ende Oktober gelöst werden sollten."'

## Find the best translation

In [11]:
from gemba.prompt import get_best_translation_propmt
import pandas as pd


df = pd.read_csv(
    "/home/ubuntu/repos/Efficient-MT/data/wmt_2020_all_reduced_system_class_updated.csv"
)
df_seg = df[df.seg_id == 6]

In [12]:
df_seg

Unnamed: 0,seg_id,system,sample_id,source,target,category,severity,source_topic,target_topic,Num token,...,Num PronType=Tot,Num Style=Arch,Num Style=Coll,"COMET_QE, cased, punctuated","COMET_QE, cased, not punctuated","COMET_QE, uncased, punctuated","COMET_QE, uncased, not punctuated",CLSSS,topic_distance,labse_distance
10,6,MT A,6_6,He knew how to manipulate the media. He knew e...,"„Er wusste, wie er die Medien manipulieren kon...","['Other', 'Accuracy', 'Fluency']",Edit,Politics & Government,Unknown,4.6,...,0.0,0.0,0.0,0.22834,0.00456,2e-05,0.0,83.698,0.102242,0.11806
11,6,MT B,6_6,He knew how to manipulate the media. He knew e...,"„Er wusste, wie man die Medien manipuliert. Er...","['No-error', 'Accuracy', 'Terminology']",Edit,Politics & Government,Unknown,1.0,...,0.0,0.0,0.0,0.2735,0.0,0.0004,0.0,84.78,0.108665,0.10128
12,6,MT C,6_6,He knew how to manipulate the media. He knew e...,Der ein Jahrzehnt als Leibwächter Jacksons tät...,"['Other', 'Terminology', 'Fluency']",Edit,Politics & Government,Politics & Government,2.0,...,0.0,0.0,0.0,0.3475,0.3007,0.0564,0.0,81.51,0.074739,0.15879
13,6,MT D,6_6,He knew how to manipulate the media. He knew e...,"„Er wusste, wie man die Medien manipuliert. Er...","['Other', 'Accuracy', 'Terminology', 'Fluency']",Edit,Politics & Government,Entertainment & Music,-3.0,...,0.0,0.0,0.0,0.1063,0.0,0.0022,0.0,85.7,0.15178,0.095097
14,6,MT E,6_6,He knew how to manipulate the media. He knew e...,"„Er wusste, wie man die Medien manipuliert. Er...","['Other', 'Accuracy', 'Terminology', 'Fluency']",Edit,Politics & Government,Entertainment & Music,-3.0,...,0.0,0.0,0.0,0.1829,0.0,0.0002,0.0,84.56,0.127199,0.100404
15,6,MT F,6_6,He knew how to manipulate the media. He knew e...,"""Er wusste, wie man die Medien manipuliert. Er...","['Other', 'Accuracy', 'Fluency']",Edit,Politics & Government,Entertainment & Music,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.120956,0.108549
16,6,MT G,6_6,He knew how to manipulate the media. He knew e...,"""Er wusste, wie man die Medien manipuliert. Er...","['Other', 'Accuracy', 'Terminology', 'Fluency']",Edit,Politics & Government,Entertainment & Music,-4.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.131539,0.105066
17,6,MT H,6_6,He knew how to manipulate the media. He knew e...,"„Er wusste, wie man die Medien manipuliert. Er...","['Other', 'Accuracy', 'Terminology', 'Fluency']",Edit,Politics & Government,Entertainment & Music,-4.0,...,0.0,0.0,0.0,0.183,0.0,0.0002,0.0,84.27,0.125635,0.101808
18,6,MT I,6_6,He knew how to manipulate the media. He knew e...,"""Er wusste, wie man die Medien manipuliert. Er...","['Other', 'Accuracy', 'Fluency']",Edit,Politics & Government,Entertainment & Music,-4.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.131888,0.11347
19,6,MT J,6_6,He knew how to manipulate the media. He knew e...,"„Er wusste, wie man die Medien manipuliert. Er...","['Other', 'Accuracy', 'Terminology', 'Fluency']",Edit,Politics & Government,Entertainment & Music,-1.0,...,0.0,0.0,0.0,0.0676,0.0,0.0,0.0,85.265,0.137343,0.101926


In [13]:
src = df_seg["source"].values[0]
mts = df_seg["target"].values.tolist()
src_lng = "en"
trg_lng = "de"



In [14]:
def select_best(src, mts, src_lng="en", trg_lng="de"):
    prompt = get_best_translation_propmt(src, mts, src_lng, trg_lng)
    parsed_answers = gptapi.request(prompt, use_model, lambda x: int(x), cache=cache)
    return parsed_answers[0]["answer"]

In [15]:
select_best(src, mts, src_lng, trg_lng)

1