In [2]:
import json
import glob
import tqdm
import os
from langchain_ollama import OllamaLLM
from langchain.prompts import PromptTemplate

from framework import get_language_name

In [3]:
prompt = PromptTemplate(
    input_variables=["text", "language"],
    template='''Translate the following sentence to {language}.
    Text: {text}
    Only output the translated text.
    Do not include any additional text or explanations.'''
)

In [4]:
model_name = "gemma3_1b"
llm = OllamaLLM(model="gemma3:1b")

In [5]:
chain = prompt | llm

In [6]:
input_data_folder = "./data/semeval.validation.v2-889a1492ba6c3791baa8f4224bc8e685/validation"
jsonl_files = glob.glob(f"{input_data_folder}/*.jsonl")

output_prediction_dir = os.path.join("data/predictions", model_name, "validation")
os.makedirs(output_prediction_dir, exist_ok=True)

In [6]:
results = []
for file_path in jsonl_files:
    filename = os.path.basename(file_path)
    outfile_path = os.path.join(output_prediction_dir, filename)

    data = []
    with open(file_path, 'r', encoding='utf-8') as f:
        for line in f:
            data.append(json.loads(line))

    results = []
    for idx, record in enumerate(data, 1):
        id = record['id']
        source = record['source']
        source_locale = record['source_locale']
        source_language = get_language_name(source_locale)
        target_locale = record['target_locale']
        target_language = get_language_name(target_locale)
        result = chain.invoke({"text": source, "language": target_language})
        """         
        result = result.replace("```json", "").replace("```", "").strip()
        try:
            result = json.loads(result)
        except json.JSONDecodeError:
            print(f"Error decoding JSON for source: {source}, result: {result}")
            continue 
        """
        model_translation = result.strip()
        results.append({
            "id": id,
            "source_language": source_language,
            "target_language": target_language,
            "text": source,
            "prediction": model_translation,
        })

        if 'pbar' not in locals():
            pbar = tqdm.tqdm(total=len(data))

        pbar.update(1)

        if idx % 10 == 0 or idx == len(data):
            with open(outfile_path, 'w', encoding='utf-8') as f:
                for res in results:
                    f.write(json.dumps(res, ensure_ascii=False) + '\n')

    print(f"Translations saved to {outfile_path}")

if 'pbar' in locals():
    pbar.close()

HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
  0%|          | 0/722 [00:00<?, ?it/s]HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
  0%|          | 2/722 [00:00<01:38,  7.28it/s]HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
  0%|          | 3/722 [00:00<02:23,  5.00it/s]HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
  1%|          | 4/722 [00:00<02:13,  5.38it/s]HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
  1%|          | 5/722 [00:01<02:53,  4.14it/s]HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
  1%|          | 6/722 [00:01<03:04,  3.89it/s]HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
  1%|          | 7/722 [00:01<03:17,  3.63it/s]HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
  1%|          | 8/722 [00:01<02:48,  4.25it/s]HTTP Request: POST http://127.0.0.1:11434/api/ge

Translations saved to data/predictions\gemma3_1b\validation\ar_AE.jsonl


723it [38:31,  3.43it/s]                         HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
724it [38:31,  3.42it/s]HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
725it [38:31,  3.28it/s]HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
726it [38:31,  3.58it/s]HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
727it [38:32,  3.87it/s]HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
728it [38:32,  3.98it/s]HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
729it [38:32,  3.59it/s]HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
730it [38:32,  3.73it/s]HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
731it [38:33,  3.95it/s]HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
732it [38:33,  3.91it/s]HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
733it

Translations saved to data/predictions\gemma3_1b\validation\de_DE.jsonl


1454it [41:45,  4.07it/s]HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
1455it [41:45,  4.25it/s]HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
1456it [41:46,  4.13it/s]HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
1457it [41:46,  4.34it/s]HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
1458it [41:46,  4.26it/s]HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
1459it [41:46,  3.94it/s]HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
1460it [41:47,  3.55it/s]HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
1461it [41:47,  3.44it/s]HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
1462it [41:47,  3.25it/s]HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
1463it [41:48,  3.52it/s]HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
1464it [41:48,  4.00

Translations saved to data/predictions\gemma3_1b\validation\es_ES.jsonl


2193it [45:06,  3.39it/s]HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
2194it [45:06,  3.40it/s]HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
2195it [45:06,  3.33it/s]HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
2196it [45:06,  3.76it/s]HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
2197it [45:07,  4.11it/s]HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
2198it [45:07,  3.65it/s]HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
2199it [45:07,  3.39it/s]HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
2200it [45:08,  3.40it/s]HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
2201it [45:08,  3.62it/s]HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
2202it [45:08,  3.89it/s]HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
2203it [45:08,  3.64

Translations saved to data/predictions\gemma3_1b\validation\fr_FR.jsonl


2917it [48:27,  3.88it/s]HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
2918it [48:27,  3.93it/s]HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
2919it [48:28,  3.82it/s]HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
2920it [48:28,  3.74it/s]HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
2921it [48:28,  3.60it/s]HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
2922it [48:29,  3.57it/s]HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
2923it [48:29,  3.19it/s]HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
2924it [48:29,  3.10it/s]HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
2925it [48:30,  3.14it/s]HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
2926it [48:30,  2.99it/s]HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
2927it [48:30,  3.24

Translations saved to data/predictions\gemma3_1b\validation\it_IT.jsonl





In [None]:
from framework import download_comet_model
comet_model = download_comet_model()

In [None]:
from framework import calculate_comet_scores, calculate_meta_score

scores_dir = os.path.join(output_prediction_dir, "scores")

if not os.path.exists(scores_dir):
    os.makedirs(scores_dir)

for file_path in jsonl_files:
    references_path = file_path
    filename = os.path.basename(file_path)
    predictions_path = os.path.join(output_prediction_dir, filename)

    comet_score = calculate_comet_scores(
        comet_model, 
        references_path, 
        predictions_path
    )

    correct_instances, total_instances, meta_score = calculate_meta_score(
        references_path,
        predictions_path)
    
    print("=============================================")
    print(f"Evaluation results for {predictions_path}")
    print(f"Correct instances: {correct_instances}, Total instances: {total_instances}")
    print(f"COMET scores: {comet_score}" )
    print(f"M-ETA score: {meta_score}")

print("Evaluation completed for all files.")


Loaded 722 instances.
Loaded 722 predictions.
Evaluation results for data/predictions\gemma3_1b\validation\ar_AE.jsonl
Correct instances: 32, Total instances: 722
M-ETA score: 4.43213296398892
Loaded 731 instances.
Loaded 731 predictions.
Evaluation results for data/predictions\gemma3_1b\validation\de_DE.jsonl
Correct instances: 88, Total instances: 731
M-ETA score: 12.038303693570452
Loaded 739 instances.
Loaded 739 predictions.
Evaluation results for data/predictions\gemma3_1b\validation\es_ES.jsonl
Correct instances: 155, Total instances: 739
M-ETA score: 20.974289580514206
Loaded 724 instances.
Loaded 724 predictions.
Evaluation results for data/predictions\gemma3_1b\validation\fr_FR.jsonl
Correct instances: 123, Total instances: 724
M-ETA score: 16.988950276243095
Loaded 730 instances.
Loaded 730 predictions.
Evaluation results for data/predictions\gemma3_1b\validation\it_IT.jsonl
Correct instances: 129, Total instances: 730
M-ETA score: 17.671232876712327
Evaluation completed for