In [None]:
import nltk
nltk.download('wordnet')
nltk.download('omw-1.4')
nltk.download('punkt')


[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\tensa\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package omw-1.4 to
[nltk_data]     C:\Users\tensa\AppData\Roaming\nltk_data...
[nltk_data]   Package omw-1.4 is already up-to-date!
[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\tensa\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!


True

In [None]:
import pandas as pd
import os
from nltk.translate.bleu_score import sentence_bleu, SmoothingFunction
from nltk.corpus import wordnet
from nltk.stem import PorterStemmer

def evaluate_translation_with_bleu_and_meteor(input_file, output_dir):
    df = pd.read_csv(input_file)

    smooth_fn = SmoothingFunction().method1
    stemmer = PorterStemmer()

    bleu_scores = []
    meteor_scores = []

    for ref, hyp in zip(df["target_id"], df["translation"]):
        if pd.isna(ref) or pd.isna(hyp):
            bleu_scores.append(0.0)
            meteor_scores.append(0.0)
            continue

        ref_tokens = ref.split()
        hyp_tokens = hyp.split()

        bleu = sentence_bleu([ref_tokens], hyp_tokens, smoothing_function=smooth_fn)

        meteor = single_meteor_score(
            ref_tokens,
            hyp_tokens,
            stemmer=stemmer,
            wordnet=wordnet
        )

        bleu_scores.append(bleu)
        meteor_scores.append(meteor)

    df["bleu_score"] = bleu_scores


    avg_bleu = sum(bleu_scores) / len(bleu_scores)
    avg_meteor = sum(meteor_scores) / len(meteor_scores)

    print(f"Rata-rata BLEU: {avg_bleu:.4f}")

    os.makedirs(output_dir, exist_ok=True)
    base_name = os.path.splitext(os.path.basename(input_file))[0]
    output_file = os.path.join(output_dir, f"{base_name}_bleu_meteor.csv")
    df.to_csv(output_file, index=False)

    return output_file


In [None]:
files=["19-08-2025/translate_EN_to_ID/data/translated_with_gemma3_1b_EN_ID_0_1000.csv",
       "19-08-2025/translate_EN_to_ID/data/translated_with_llama3.2_3b_EN_ID_0_1000.csv",
       "19-08-2025/translate_EN_to_ID/data/translated_with_qwen3_0.6b_EN_ID_0_1000.csv"]

for file in files:
    output_dir = "19-08-2025/translate_EN_to_ID/bleu_meteor"
    os.makedirs(output_dir, exist_ok=True)
    print(f"Evaluating {file}...")
    output_file = evaluate_translation_with_bleu_and_meteor(file, output_dir)
    print(f"Evaluation results saved to {output_file}")

Evaluating 19-08-2025/translate_EN_to_ID/data/translated_with_gemma3_1b_EN_ID_0_1000.csv...
Rata-rata BLEU: 0.1240
Rata-rata METEOR: 0.3720
Evaluation results saved to 19-08-2025/translate_EN_to_ID/bleu_meteor\translated_with_gemma3_1b_EN_ID_0_1000_bleu_meteor.csv
Evaluating 19-08-2025/translate_EN_to_ID/data/translated_with_llama3.2_3b_EN_ID_0_1000.csv...
Rata-rata BLEU: 0.1631
Rata-rata METEOR: 0.4591
Evaluation results saved to 19-08-2025/translate_EN_to_ID/bleu_meteor\translated_with_llama3.2_3b_EN_ID_0_1000_bleu_meteor.csv
Evaluating 19-08-2025/translate_EN_to_ID/data/translated_with_qwen3_0.6b_EN_ID_0_1000.csv...
Rata-rata BLEU: 0.0628
Rata-rata METEOR: 0.2602
Evaluation results saved to 19-08-2025/translate_EN_to_ID/bleu_meteor\translated_with_qwen3_0.6b_EN_ID_0_1000_bleu_meteor.csv
