# Evaluate and compare metrics 

In [1]:
import os
import json
import pandas as pd
import numpy as np
from metrics.evaluation_transition import evaluate_translation


In [2]:
# 평가를 위해 reference text 가져오기 
def load_text_file(file_path):
    with open(file_path, "r", encoding="utf-8") as f:
        lines = f.readlines()
    return [line.strip() for line in lines]

In [3]:
# Configurations
translation_config = 'ko2en'
target_lang = "English"  # 목표 언어

data_dir = f"../data/flores/"
result_dir = f"../result/translate/{translation_config}/"
leaderboard_file = f"../result/translate/{translation_config}/leaderboard.csv"

# 기존 리더보드 파일 로드 또는 새로 생성
if os.path.exists(leaderboard_file):
    leaderboard = pd.read_csv(leaderboard_file)
else:
    leaderboard = pd.DataFrame(columns=[
        "device-type", "device-name", "llm", "quantization", "calibration",
        "BLEU", "METEOR", "BERTScore", "tps"
    ])
    
# Load reference data
data_eng = load_text_file(f"{data_dir}/devtest.eng_Latn")
data_kor = load_text_file(f"{data_dir}/devtest.kor_Hang")



In [4]:
active_metrics = ['BLEU', 'METEOR', 'BERTScore', 'tps']

# Process each JSON file
for filename in os.listdir(result_dir):
    if not filename.endswith(".json"):
        continue

    # Parse metadata from filename
    name_head = filename.replace(".json", "")
    metadata = {
        "device-type": name_head.split("-")[0],
        "device-name": name_head.split("_")[0].split("-")[1],
        "llm": name_head.split("_")[1].split('-')[0],
        "quantization": name_head.split("_calib")[0].split('-')[-1],
        "calibration": name_head.split("_calib-")[1],
    }

    # Skip if already in leaderboard
    if ((leaderboard["device-type"] == metadata["device-type"]) &
        (leaderboard["device-name"] == metadata["device-name"]) &
        (leaderboard["llm"] == metadata["llm"]) &
        (leaderboard["quantization"] == metadata["quantization"]) &
        (leaderboard["calibration"] == metadata["calibration"])).any():
        print(f"Skipping {filename}, already in leaderboard.")
        continue
    
    # Load translation results
    with open(os.path.join(result_dir, filename), "r", encoding="utf-8") as file:
        json_data = json.load(file)
        translations = json_data.get("translations", [])

    # Evaluate translations
    num_metrics = 4  # BLEU, METEOR, BERTScore, TPS
    metrics = np.full((num_metrics, len(translations)), np.nan)

    for i, result in enumerate(translations):
        translation = result.get("translation", "")
        elapsed_time = result.get("elapsed_time", 1e-6)  # Default time if not provided
        ref_text = data_eng[i] if i < len(data_eng) else ""

        # Evaluate translation
        metric_result = evaluate_translation(
            translation, ref_text, target_lang, elapsed_time, active_metrics
        )
        for j, metric_name in enumerate(active_metrics):
            metrics[j, i] = metric_result.get(metric_name, np.nan)

    # Calculate averages
    avg_metrics = {metric: np.nanmean(metrics[j, :]) for j, metric in enumerate(active_metrics)}

    # Add to leaderboard
    new_entry = pd.DataFrame([{
        **metadata,
        **avg_metrics
    }])  # Create a DataFrame for the new entry

    # Concatenate the new entry to the leaderboard
    leaderboard = pd.concat([leaderboard, new_entry], ignore_index=True)
    print(f"Processed and added {filename} to leaderboard.")

# 리더보드 CSV 파일로 저장
leaderboard.to_csv(leaderboard_file, index=False)
print(f"Leaderboard updated and saved to {leaderboard_file}")

  leaderboard = pd.concat([leaderboard, new_entry], ignore_index=True)


Processed and added GPU-A5000_qwen2.5:72b-Q4_K_M_calib-base.json to leaderboard.
Processed and added NPU-RNGD_llama3.1-8B-Instruct-W8A8_calib-base.json to leaderboard.
Processed and added GPU-A5000_llama3.1:70b-Q4_K_M_calib-base.json to leaderboard.
Processed and added GPU-A5000_llama3.3:70b-Q4_K_M_calib-base.json to leaderboard.
Processed and added GPU-A5000_llama3.1-Q4_K_M_calib-base.json to leaderboard.
Leaderboard updated and saved to ../result/translate/ko2en/leaderboard.csv
