# Eval English-Hungarian translation with BLEU

In [5]:
from openai import AzureOpenAI
import os

azureClient = AzureOpenAI(api_key = os.getenv("AZURE_OPENAI_API_KEY"), azure_endpoint = os.getenv("AZURE_OPENAI_ENDPOINT"), api_version = "2023-05-15")
def translate(english):
    try:
        response = azureClient.completions.create(
            model=os.getenv("AZURE_OPENAI_DEPLOYMENT_NAME"),  # Specify the engine you want to use
            prompt=f"""<|system|>Translate the user provided English sentence to Hungarian.
<|user|>What time is it?
<|assistant|>Mennyi az idő?
<|user|>The surface is the darkest among Uranian moons, and appears to have been shaped primarily by impacts.
<|assistant|>A felszíne a legsötétebb az uránuszi holdak közül, és úgy tűnik, leginkább becsapódások alakították.
<|user|>He created numerous programs to provide relief to the unemployed and farmers while seeking economic recovery with the National Recovery Administration and other programs.
<|assistant|>Számos programot hozott létre a munkanélküliek és gazdálkodók megsegítésére, miközben az Országos Helyreállítási Igazgatósággal és más programokkal kereste a gazdasági fellendülést.
<|user|>{english}
<|assistant|>
""",
            max_tokens=10,  # Adjust the number of tokens as needed
            temperature=0
        )
        
        return response.choices[0].text.strip()
    except Exception as e:
        return ""


In [1]:
import requests

def translate(english):
    api_url = "http://localhost:5001/api/v1"
    stop_words = ["###","</s>","<|", "\n"]
    headers = {
        "Content-Type": "application/json"
    }

    data = {
        "prompt": f"""<|system|>Translate the user provided English sentence to Hungarian.
<|user|>What time is it?
<|assistant|>Mennyi az idő?
<|user|>The surface is the darkest among Uranian moons, and appears to have been shaped primarily by impacts.
<|assistant|>A felszíne a legsötétebb az uránuszi holdak közül, és úgy tűnik, leginkább becsapódások alakították.
<|user|>He created numerous programs to provide relief to the unemployed and farmers while seeking economic recovery with the National Recovery Administration and other programs.
<|assistant|>Számos programot hozott létre a munkanélküliek és gazdálkodók megsegítésére, miközben az Országos Helyreállítási Igazgatósággal és más programokkal kereste a gazdasági fellendülést.
<|user|>{english}
<|assistant|>
""",
        "max_tokens": len(english)*2,
        "temperature": 0,
        "top_p": 1.0,
        "n": 20,
        "stop": stop_words
    }
    
    response = requests.post(f"{api_url}/completion", headers=headers, json=data)
    result = response.json()["choices"][0]["text"]
    for sw in stop_words:
        result = result.replace(sw, "")
    return result

In [2]:
import subprocess
# Checks for spelling errors in the sentence. Returns true if no errors were found
def SpellCheckHU(sentence):
    # Run the bash command with the provided input string
    result = subprocess.run("hunspell -d hu-HU -l", input=sentence, capture_output=True, text=True, shell=True, encoding='utf-8')
    # Capture the output
    outputlines = result.stdout.splitlines()
    return len(outputlines)

In [3]:
def WordCount(sentence):
    return len(sentence.split())

In [4]:
from datasets import load_dataset
import evaluate

dataset = load_dataset('json', data_files="hunglish-BLEU.json")
metric = evaluate.load('bleu')

predictions = []
references = []
spell_error_rates = []
cnt = 0
errors = 0
for idx, eng in enumerate(dataset['train']["english"]):
    cnt += 1
    hun = translate(eng)
    if WordCount(hun) > 0:
        predictions.append(hun)
        references.append([dataset['train']["hungarian"][idx]])
        spell_error_rates.append(SpellCheckHU(hun)/WordCount(hun))
        print(".", end="")
    else:
        print("x", end="")
        errors += 1

results = metric.compute(predictions=predictions, references=references)
print()
print(f"Statistics: {errors}/{cnt} (errors/count)")
avgerror = sum(spell_error_rates) / len(spell_error_rates)
print(f"Average spell errors: {avgerror * 100: 5.1f}%")
print(results)


........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................