### 1. Import necessary libraries

In [1]:
import json
import glob
import demjson3
import ast
import re
from ftfy import fix_text
from langchain_ollama import OllamaLLM
from langchain.prompts import PromptTemplate
from comet.models import download_model, load_from_checkpoint
import os
from framework import extract_entity_translation, calculate_comet_scores, calculate_meta_score

  from .autonotebook import tqdm as notebook_tqdm


### 2. Examples for one shot

In [2]:
examples = {
    "ar": {
        "exampleSentence": "Where is the Burj Khalifa located?",
        "exampleEntities": ["Burj Khalifa"],
        "exampleTranslation": "أين يقع برج خليفة؟",
        "exampleTranslatedEntities": ["برج خليفة"]
    },
    "zh": {
        "exampleSentence": "When was the Great Wall of China built?",
        "exampleEntities": ["Great Wall of China"],
        "exampleTranslation": "中國長城是什麼時候建造的？",
        "exampleTranslatedEntities": ["中國長城"]
    },
    "fr": {
        "exampleSentence": "Who painted the Mona Lisa?",
        "exampleEntities": ["Mona Lisa"],
        "exampleTranslation": "Qui a peint la Joconde ?",
        "exampleTranslatedEntities": ["la Joconde"]
    },
    "de": {
        "exampleSentence": "Which river flows through Berlin?",
        "exampleEntities": ["Berlin"],
        "exampleTranslation": "Welcher Fluss fließt durch Berlin?",
        "exampleTranslatedEntities": ["Berlin"]
    },
    "it": {
        "exampleSentence": "Where is the Colosseum located?",
        "exampleEntities": ["Colosseum"],
        "exampleTranslation": "Dove si trova il Colosseo?",
        "exampleTranslatedEntities": ["Colosseo"]
    },
    "ja": {
        "exampleSentence": "Which city is Mount Fuji near?",
        "exampleEntities": ["Mount Fuji"],
        "exampleTranslation": "富士山はどの都市の近くにありますか？",
        "exampleTranslatedEntities": ["富士山"]
    },
    "ko": {
        "exampleSentence": "Who is the lead actor in Squid Game?",
        "exampleEntities": ["Squid Game"],
        "exampleTranslation": "오징어 게임의 주연 배우는 누구입니까?",
        "exampleTranslatedEntities": ["오징어 게임"]
    },
    "es": {
        "exampleSentence": "Where was Pablo Picasso born?",
        "exampleEntities": ["Pablo Picasso"],
        "exampleTranslation": "¿Dónde nació Pablo Picasso?",
        "exampleTranslatedEntities": ["Pablo Picasso"]
    },
    "th": {
        "exampleSentence": "Where can you see the Grand Palace in Thailand?",
        "exampleEntities": ["Grand Palace", "Thailand"],
        "exampleTranslation": "พระบรมมหาราชวังตั้งอยู่ที่ไหนในประเทศไทย?",
        "exampleTranslatedEntities": ["พระบรมมหาราชวัง", "ประเทศไทย"]
    },
    "tr": {
        "exampleSentence": "In which city is the Hagia Sophia located?",
        "exampleEntities": ["Hagia Sophia"],
        "exampleTranslation": "Ayasofya hangi şehirde bulunur?",
        "exampleTranslatedEntities": ["Ayasofya"]
    }
}

### 3. Create prompt to translate a sentence to respective language (zero-shot)

In [None]:
translation_prompt_zero_shot = PromptTemplate(
    input_variables=["sentence", "language"],
    template="""
You are a professional translator.

Translate the following sentence into {language}, ensuring that all named entities are preserved correctly. If no translation exists for an entity, retain it in its original form. Do not transliterate or guess unknown names.

Return only a JSON object with a single key "translation". No explanations, no extra fields, no formatting markers and no answers to any questions. ONLY return the translation.

Sentence: "{sentence}"

Output format:
{{
  "translation": "<translated sentence here>"
}}
"""
)

### 4. Create prompt to translate a sentence to respective language (one-shot)

In [None]:
translation_prompt_one_shot = PromptTemplate(
    input_variables=["sentence", "language", "exampleSentence", "exampleTranslation", "exampleEntities"],
    template="""
You are a professional translator.

Translate the following sentence into {language}, ensuring that all named entities are preserved correctly. If no translation exists for an entity, retain it in its original form. Do not transliterate or guess unknown names.

Return only a JSON object with a single key "translation". No explanations, no extra fields, no formatting markers and no answers to any questions. ONLY return the translation.

Example:
Sentence: "{exampleSentence}"
Entities: {exampleEntities}
Output:
{{
  "translation": "{exampleTranslation}"
}}

Now translate this:
Sentence: "{sentence}"

Output format:
{{
  "translation": "<translated sentence here>"
}}
"""
)

### 5. Get folder and file path for translation

In [4]:
language_filepaths = {}

def load_all_jsonl_files_by_language(folder_path):
    lang_data = {}

    for file_path in glob.glob(f"{folder_path}/*.jsonl"):
        file_name = os.path.basename(file_path)
        lang_code = file_name.split("_")[0]
        language_filepaths[lang_code] = os.path.splitext(file_name)[0] 

        if lang_code not in lang_data:
            lang_data[lang_code] = []

        with open(file_path, 'r', encoding='utf-8') as f:
            for line in f:
                lang_data[lang_code].append(json.loads(line))

    return lang_data


def get_language_name(short_code):
    lang_map = {
        'ar': 'Arabic', 'zh': 'Chinese (Traditional)', 'fr': 'French', 'de': 'German',
        'it': 'Italian', 'ja': 'Japanese', 'ko': 'Korean', 'es': 'Spanish',
        'th': 'Thai', 'tr': 'Turkish', 'en': 'English'
    }
    return lang_map.get(short_code, short_code)

### 6. Retrieve data from JSON files

In [5]:
jsonl_folder = "data/references/validation"
all_lang_data  = load_all_jsonl_files_by_language(jsonl_folder)

### 7. Verify the loaded files

In [6]:
for lang_code, records in all_lang_data.items():
    print(f"Loaded {len(records)} records for {get_language_name(lang_code)} ({lang_code})")

Loaded 722 records for Arabic (ar)
Loaded 731 records for German (de)
Loaded 739 records for Spanish (es)
Loaded 724 records for French (fr)
Loaded 730 records for Italian (it)
Loaded 723 records for Japanese (ja)
Loaded 745 records for Korean (ko)
Loaded 710 records for Thai (th)
Loaded 732 records for Turkish (tr)
Loaded 722 records for Chinese (Traditional) (zh)


### 8. Define LangChain with prompt templates

In [None]:
# Use Ollama
llm = OllamaLLM(model="mistral")
chain_translate = translation_prompt_zero_shot | llm
chain2 = translation_prompt_one_shot | llm

### 9. Handle comma related errors during parsing

In [8]:
def fix_missing_commas(raw_text):
    fixed = re.sub(r'("\s*)(")', r'\1,\2', raw_text)  # insert missing comma between two quoted fields
    return fixed

### 10. Handle parsing JSON objects

In [9]:
def safe_model_output_parse(raw_output):

    if isinstance(raw_output, dict):
        return raw_output

    if not isinstance(raw_output, str):
        try:
            raw_output = raw_output.decode('utf-8')
        except:
            raw_output = str(raw_output)

    try:
        fixed = fix_text(raw_output.strip())
        
        return json.loads(fixed)
    except Exception:
        try:
            fixed = fix_text(raw_output.strip())
            
            return demjson3.decode(fixed)
        except Exception as e:
            print(f"Failed to recover batch with demjson3: {e}")
            
            return None

### 11. Perform translation for zero-shot

In [None]:
def translate_sentence_zero_shot(source, language):
    retry_cnt = 0
    
    # retry till entities are not part of translated sentence
    while retry_cnt < 5:
        retry_cnt += 1
        
        try:
            raw_translated = chain_translate.invoke({
                "sentence": source,
                "language": language
            })
            
            return json.loads(raw_translated)
        except Exception:
            
            try:
                cleaned = fix_missing_commas(raw_translated)
                fixed = ast.literal_eval(cleaned.replace("'", '"'))
                raw_translated = safe_model_output_parse(fixed)

                if raw_translated is None:
                    print("Failed to parse model output, retrying...")
                    retry_cnt -= 1
                    
                    continue
                else:
                    return raw_translated
            except Exception as e2:
                print(f"Failed to recover batch with demjson3: {e2}")
                
                continue

    return None

### 12. Perform translation for one-shot

In [16]:
def translate_sentence_one_shot(source, language, record):
    retry_cnt = 0
    
    # retry till entities are not part of translated sentence
    while retry_cnt < 5:
        retry_cnt += 1
        
        try:
            raw_translated = chain2.invoke({
                "sentence": source,
                "language": language,
                "exampleSentence": examples[record['target_locale']]['exampleSentence'],
                "exampleEntities": examples[record['target_locale']]['exampleEntities'],
                "exampleTranslation": examples[record['target_locale']]['exampleTranslation']
            })
            
            return json.loads(raw_translated)
        except Exception:
            
            try:
                cleaned = fix_missing_commas(raw_translated)
                fixed = ast.literal_eval(cleaned.replace("'", '"'))
                raw_translated = safe_model_output_parse(fixed)

                if raw_translated is None:
                    print("Failed to parse model output, retrying...")
                    retry_cnt -= 1
                    
                    continue
            except Exception as e2:
                print(f"Failed to recover batch with demjson3: {e2}")
                
                continue

    return None

### 13. Begin zero-shot using LangChain for each language

In [None]:
for lang_code, records in all_lang_data.items():
    language = get_language_name(lang_code)

    output_file = f"data/predictions/mistral7b/validation/zero_shot/{language_filepaths[lang_code]}.jsonl"
    results = []

    for record in records:

        source = record['source']

        # Translate sentence
        raw_translated = translate_sentence_zero_shot(source, language)

        if (raw_translated == None):
            continue

        results.append({
            "id": record['id'],
            "text": source,
            "source_language": record['source_locale'],
            "target_language": record['target_locale'],
            "prediction": raw_translated['translation'],
        })
        
        # Record the results to a file
        with open(output_file, 'w', encoding='utf-8') as f:
            for res in results:
                f.write(json.dumps(res, ensure_ascii=False) + '\n')

### 14. Begin one-shot using LangChain for each language

In [None]:
for lang_code, records in all_lang_data.items():
    language = get_language_name(lang_code)

    if lang_code == 'ar' or lang_code == 'de' or lang_code == 'es' or lang_code == 'fr':
        continue

    output_file = f"data/predictions/mistral7b/validation/one_shot/{language_filepaths[lang_code]}.jsonl"
    results = []

    for record in records:

        source = record['source']
        
        # Translate sentence
        raw_translated = translate_sentence_one_shot(source, language, record)

        if (raw_translated == None):
            continue

        results.append({
            "id": record['id'],
            "text": source,
            "source_language": record['source_locale'],
            "target_language": record['target_locale'],
            "prediction": raw_translated['translation'],
        })
        
        # Record the results to a file
        with open(output_file, 'w', encoding='utf-8') as f:
            for res in results:
                f.write(json.dumps(res, ensure_ascii=False) + '\n')

HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
HTTP Request: POST http://127.0.0.1:11434/api/gener

Failed to recover batch with demjson3: unterminated string literal (detected at line 5) (<unknown>, line 5)


HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
HTTP Request: POST http://127.0.0.1:11434/api/gener

Failed to recover batch with demjson3: invalid syntax (<unknown>, line 5)


HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"


Failed to recover batch with demjson3: invalid syntax (<unknown>, line 5)


HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
HTTP Request: POST http://127.0.0.1:11434/api/gener

Failed to recover batch with demjson3: invalid syntax (<unknown>, line 2)


HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
HTTP Request: POST http://127.0.0.1:11434/api/gener

Failed to recover batch with demjson3: unexpected indent (<unknown>, line 5)


HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
HTTP Request: POST http://127.0.0.1:11434/api/gener

Failed to recover batch with demjson3: invalid syntax (<unknown>, line 2)


HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
HTTP Request: POST http://127.0.0.1:11434/api/gener

Failed to recover batch with demjson3: unterminated string literal (detected at line 2) (<unknown>, line 2)


HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
HTTP Request: POST http://127.0.0.1:11434/api/gener

Failed to recover batch with demjson3: invalid syntax (<unknown>, line 2)


HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
HTTP Request: POST http://127.0.0.1:11434/api/gener

Failed to recover batch with demjson3: unterminated string literal (detected at line 2) (<unknown>, line 2)


HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
HTTP Request: POST http://127.0.0.1:11434/api/gener

Failed to recover batch with demjson3: invalid syntax (<unknown>, line 5)


HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
HTTP Request: POST http://127.0.0.1:11434/api/gener

Failed to recover batch with demjson3: unterminated string literal (detected at line 2) (<unknown>, line 2)


HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
HTTP Request: POST http://127.0.0.1:11434/api/gener

Failed to recover batch with demjson3: invalid syntax (<unknown>, line 5)


HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
HTTP Request: POST http://127.0.0.1:11434/api/gener

Failed to recover batch with demjson3: invalid syntax (<unknown>, line 2)


HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
HTTP Request: POST http://127.0.0.1:11434/api/gener

### 15. Define folder and file structure to save M-ETA and COMET scores

In [28]:
comet_model_path = download_model("Unbabel/wmt22-comet-da")
comet_model = load_from_checkpoint(comet_model_path)
model_name = "mistral7b"
output_prediction_dir = os.path.join("data/predictions", model_name, "validation")
os.makedirs(output_prediction_dir, exist_ok=True)

input_data_folder = "data/references/validation"
jsonl_files = glob.glob(f"{input_data_folder}/*.jsonl")

Fetching 5 files: 100%|██████████| 5/5 [00:00<?, ?it/s]
Lightning automatically upgraded your loaded checkpoint from v1.8.3.post1 to v2.5.1.post0. To apply the upgrade to your files permanently, run `python -m pytorch_lightning.utilities.upgrade_checkpoint C:\Users\Z004H06T\.cache\huggingface\hub\models--Unbabel--wmt22-comet-da\snapshots\2760a223ac957f30acfb18c8aa649b01cf1d75f2\checkpoints\model.ckpt`
Encoder model frozen.
c:\Users\Z004H06T\AppData\Local\Programs\Python\Python310\lib\site-packages\pytorch_lightning\core\saving.py:195: Found keys that are not in the model state dict but in the checkpoint: ['encoder.model.embeddings.position_ids']


### 16. COMET and M-ETA scores calculation

In [29]:
def calculate_scores(template_id):
    scores_dir = os.path.join(output_prediction_dir, template_id, "scores")
    
    if not os.path.exists(scores_dir):
        os.makedirs(scores_dir, exist_ok=True)

    for file_path in jsonl_files:
        references_path = file_path
        filename = os.path.basename(file_path)
        predictions_path = os.path.join(output_prediction_dir, template_id, filename)
        
        comet_score = calculate_comet_scores(
            comet_model, 
            references_path, 
            predictions_path
        )

        correct_instances, total_instances, meta_score = calculate_meta_score(
            references_path,
            predictions_path)

        evaluation_results = {
            "correct_instances": correct_instances,
            "total_instances": total_instances,
            "comet_score": comet_score,
            "meta_score": meta_score
        }

        new_filename = filename.replace(".jsonl", ".json")
        evaluation_output_path = os.path.join(scores_dir, new_filename)
        with open(evaluation_output_path, 'w', encoding='utf-8') as json_file:
            json.dump(evaluation_results, json_file, ensure_ascii=False, indent=4)

### 17. Calculate COMET and M-ETA scores for quality evaluation (zero-shot)

In [None]:
calculate_scores("zero_shot")

### 18. Calculate COMET and M-ETA scores for quality evaluation (one-shot)

In [30]:
calculate_scores("one_shot")

Using default `ModelCheckpoint`. Consider installing `litmodels` package to enable `LitModelCheckpoint` for automatic upload to the Lightning model registry.


All references have a corresponding prediction
Created 1177 instances


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Predicting DataLoader 0: 100%|██████████| 37/37 [00:21<00:00,  1.75it/s]
Using default `ModelCheckpoint`. Consider installing `litmodels` package to enable `LitModelCheckpoint` for automatic upload to the Lightning model registry.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Average COMET score: 63.17
Loaded 722 instances.
Loaded 722 predictions.
All references have a corresponding prediction
Created 1260 instances


Predicting DataLoader 0: 100%|██████████| 40/40 [00:21<00:00,  1.82it/s]
Using default `ModelCheckpoint`. Consider installing `litmodels` package to enable `LitModelCheckpoint` for automatic upload to the Lightning model registry.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Average COMET score: 84.58
Loaded 731 instances.
Loaded 731 predictions.
All references have a corresponding prediction
Created 1229 instances


Predicting DataLoader 0: 100%|██████████| 39/39 [00:21<00:00,  1.80it/s]
Using default `ModelCheckpoint`. Consider installing `litmodels` package to enable `LitModelCheckpoint` for automatic upload to the Lightning model registry.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Average COMET score: 88.81
Loaded 739 instances.
Loaded 739 predictions.
All references have a corresponding prediction
Created 1316 instances


Predicting DataLoader 0: 100%|██████████| 42/42 [00:24<00:00,  1.69it/s]
Using default `ModelCheckpoint`. Consider installing `litmodels` package to enable `LitModelCheckpoint` for automatic upload to the Lightning model registry.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Average COMET score: 85.05
Loaded 724 instances.
Loaded 724 predictions.
All references have a corresponding prediction
Created 1268 instances


Predicting DataLoader 0: 100%|██████████| 40/40 [00:21<00:00,  1.86it/s]
Using default `ModelCheckpoint`. Consider installing `litmodels` package to enable `LitModelCheckpoint` for automatic upload to the Lightning model registry.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Average COMET score: 87.16
Loaded 730 instances.
Loaded 730 predictions.
All references have a corresponding prediction
Created 1409 instances


Predicting DataLoader 0: 100%|██████████| 45/45 [01:14<00:00,  1.66s/it]
Using default `ModelCheckpoint`. Consider installing `litmodels` package to enable `LitModelCheckpoint` for automatic upload to the Lightning model registry.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Average COMET score: 80.73
Loaded 723 instances.
Loaded 723 predictions.
All references have a corresponding prediction
Created 1660 instances


Predicting DataLoader 0: 100%|██████████| 52/52 [00:28<00:00,  1.82it/s]
Using default `ModelCheckpoint`. Consider installing `litmodels` package to enable `LitModelCheckpoint` for automatic upload to the Lightning model registry.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Average COMET score: 79.75
Loaded 745 instances.
Loaded 745 predictions.
All references have a corresponding prediction
Created 1654 instances


Predicting DataLoader 0: 100%|██████████| 52/52 [00:35<00:00,  1.47it/s]
Using default `ModelCheckpoint`. Consider installing `litmodels` package to enable `LitModelCheckpoint` for automatic upload to the Lightning model registry.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Average COMET score: 48.88
Loaded 710 instances.
Loaded 710 predictions.
All references have a corresponding prediction
Created 1260 instances


Predicting DataLoader 0: 100%|██████████| 40/40 [00:20<00:00,  1.92it/s]
Using default `ModelCheckpoint`. Consider installing `litmodels` package to enable `LitModelCheckpoint` for automatic upload to the Lightning model registry.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Average COMET score: 73.09
Loaded 732 instances.
Loaded 732 predictions.
All references have a corresponding prediction
Created 1544 instances


Predicting DataLoader 0: 100%|██████████| 49/49 [00:25<00:00,  1.94it/s]


Average COMET score: 77.97
Loaded 722 instances.
Loaded 722 predictions.
