In [None]:
import os
import json
import jieba
from transformers import MBartForConditionalGeneration, MBart50Tokenizer
from datasets import load_dataset
from tqdm import tqdm
import re
from nltk.translate.bleu_score import sentence_bleu
import OpenHowNet
# 確保 Google Drive 已掛載
from google.colab import drive
drive.mount('/content/drive')
# 加载 OpenHowNet
hownet_dict = OpenHowNet.HowNetDict()
hownet_dict.initialize_similarity_calculation()

# 加载哈工大词林并构建同义词集合
def load_cilin(file_path):
    synonym_groups = []
    with open(file_path, encoding='utf-8') as f:
        for line in f:
            if "=" in line:
                words = line.strip().split('=')[1].split()
                synonym_groups.append(set(words))
    return synonym_groups

# 检查两个词是否为同义词，包含 OpenHowNet 语义相似度
def are_synonyms(word1, word2, synonym_groups, threshold=0.7):
    for group in synonym_groups:
        if word1 in group and word2 in group:
            return True
    similarity = hownet_dict.calculate_word_similarity(word1, word2)
    if similarity is not None and similarity >= threshold:
        return True
    return False

# 分词函数
def tokenize(text):
    return list(jieba.cut(text))

# 计算对齐词数及对齐的词
def calculate_alignment(trans_tokens, ref_tokens, synonym_groups):
    aligned = []
    ref_set = set(ref_tokens)
    for token in trans_tokens:
        if token in ref_set:
            aligned.append(token)
            ref_set.remove(token)
        else:
            for ref_token in ref_set:
                if are_synonyms(token, ref_token, synonym_groups):
                    aligned.append(token)
                    ref_set.remove(ref_token)
                    break
    return len(aligned), aligned

# 计算 Fragmentation Penalty
def calculate_fragmentation(trans_tokens, aligned_tokens):
    aligned_indices = [i for i, token in enumerate(trans_tokens) if token in aligned_tokens]
    if not aligned_indices:
        return 1.0
    fragments = 1
    for i in range(1, len(aligned_indices)):
        if aligned_indices[i] != aligned_indices[i - 1] + 1:
            fragments += 1
    fragmentation_penalty = 0.5 * (fragments / len(aligned_indices))**3
    return fragmentation_penalty

# 计算 METEOR
def calculate_meteor(trans_tokens, ref_tokens, synonym_groups):
    aligned_count, aligned_tokens = calculate_alignment(trans_tokens, ref_tokens, synonym_groups)
    precision = aligned_count / len(trans_tokens) if trans_tokens else 0
    recall = aligned_count / len(ref_tokens) if ref_tokens else 0

    alpha = 0.9
    f_score = (precision * recall) / (alpha * precision + (1 - alpha) * recall) if precision + recall > 0 else 0
    penalty = calculate_fragmentation(trans_tokens, aligned_tokens)
    meteor = f_score * (1 - penalty)

    return {
        "Aligned Tokens": aligned_tokens,
        "Aligned Count": aligned_count,
        "Precision": precision,
        "Recall": recall,
        "F-Score": f_score,
        "Fragmentation Penalty": penalty,
        "METEOR": meteor,
    }

# 配置路径
cilin_path = "/content/cilin.txt"  # 同义词词典路径
config_folder = "/content/drive/MyDrive/mbart_finetuned_dynamic_final_2"
weights_folder = "/content/drive/MyDrive/mbart_finetuned_dynamic_updated_2/checkpoint-156105"
output_path = "/content/drive/MyDrive/NER_Output/translated_results_iwslt_1127.json"

# 加载同义词词林
synonym_groups = load_cilin(cilin_path)

# 加载 IWSLT 数据集
dataset = load_dataset("iwslt2017", "iwslt2017-en-zh", split="test")

# 加载模型和 tokenizer
model = MBartForConditionalGeneration.from_pretrained(weights_folder, config=config_folder).to("cuda")
tokenizer = MBart50Tokenizer.from_pretrained(config_folder)

# 评估函数
def evaluate_translations(model, tokenizer, dataset, output_path, synonym_groups, num_translations=5):
    print("Starting evaluation...")
    translated_results = []
    total_meteor = 0
    total_bleu = 0
    num_sentences = 0

    for example in tqdm(dataset):
        input_text = example["translation"]["en"]
        reference_text = tokenize(example["translation"]["zh"])

        # 模型生成多个翻译
        inputs = tokenizer(input_text, return_tensors="pt", truncation=True, max_length=256).to("cuda")
        outputs = model.generate(
            inputs["input_ids"],
            max_length=256,
            num_return_sequences=num_translations,
            num_beams=num_translations,
            early_stopping=True
        )
        translations = [tokenize(tokenizer.decode(output, skip_special_tokens=True)) for output in outputs]

        # 计算 METEOR 分数
        meteor_scores = [
            calculate_meteor(translation, reference_text, synonym_groups)["METEOR"]
            for translation in translations
        ]

        # 找到最佳翻译
        best_translation_idx = max(range(len(meteor_scores)), key=lambda idx: meteor_scores[idx])
        best_translation = translations[best_translation_idx]
        best_meteor_score = meteor_scores[best_translation_idx]

        # 计算 BLEU 分数
        bleu_score = sentence_bleu([reference_text], best_translation)

        # 累积分数
        total_meteor += best_meteor_score
        total_bleu += bleu_score
        num_sentences += 1

        translated_results.append({
            "Original Text": input_text,
            "Reference Text": " ".join(reference_text),
            "All Translations": [" ".join(translation) for translation in translations],
            "Best Translation": " ".join(best_translation),
            "Best METEOR Score": best_meteor_score,
            "BLEU Score": bleu_score
        })

    # 保存结果
    with open(output_path, "w", encoding="utf-8") as f:
        json.dump(translated_results, f, ensure_ascii=False, indent=4)

    # 输出总体分数
    print(f"Overall METEOR: {total_meteor / num_sentences}")
    print(f"Overall BLEU: {total_bleu / num_sentences}")
    print(f"Evaluation results saved to {output_path}")

# 执行评估
evaluate_translations(model, tokenizer, dataset, output_path, synonym_groups, num_translations=5)
