In [2]:
!python -m spacy download en_core_web_trf

Collecting en-core-web-trf==3.7.3
  Downloading https://github.com/explosion/spacy-models/releases/download/en_core_web_trf-3.7.3/en_core_web_trf-3.7.3-py3-none-any.whl (457.4 MB)
     ---------------------------------------- 0.0/457.4 MB ? eta -:--:--
     -------------------------------------- 0.0/457.4 MB 640.0 kB/s eta 0:11:55
     ---------------------------------------- 0.3/457.4 MB 2.6 MB/s eta 0:02:55
     ---------------------------------------- 1.3/457.4 MB 9.3 MB/s eta 0:00:50
     --------------------------------------- 4.1/457.4 MB 21.9 MB/s eta 0:00:21
      -------------------------------------- 6.7/457.4 MB 28.5 MB/s eta 0:00:16
      -------------------------------------- 8.0/457.4 MB 28.3 MB/s eta 0:00:16
      ------------------------------------- 11.1/457.4 MB 50.4 MB/s eta 0:00:09
     - ------------------------------------ 14.0/457.4 MB 54.7 MB/s eta 0:00:09
     - ------------------------------------ 17.3/457.4 MB 59.5 MB/s eta 0:00:08
     - --------------------

DEPRECATION: omegaconf 2.0.6 has a non-standard dependency specifier PyYAML>=5.1.*. pip 24.1 will enforce this behaviour change. A possible replacement is to upgrade to a newer version of omegaconf or contact the author to suggest that they release a version with a conforming dependency specifiers. Discussion can be found at https://github.com/pypa/pip/issues/12063

[notice] A new release of pip is available: 24.0 -> 24.3.1
[notice] To update, run: python.exe -m pip install --upgrade pip


# mbart-large

In [5]:
import spacy
from datasets import load_dataset
from transformers import MBartForConditionalGeneration, MBart50Tokenizer, Seq2SeqTrainer, Seq2SeqTrainingArguments, AdamW, EarlyStoppingCallback, TrainerCallback
import torch
import re
import warnings
from tqdm import tqdm
import evaluate
from bert_score import score as bert_score

warnings.filterwarnings("ignore")

# 設定設備
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# 載入 SpaCy NER 模型
nlp = spacy.load("en_core_web_trf")

# 載入 IWSLT 2017 英中翻譯資料集
dataset = load_dataset('iwslt2017', 'iwslt2017-en-zh', split='train[:200]', trust_remote_code=True)

# 載入 mBART 模型和 Tokenizer
model_name = "facebook/mbart-large-50-many-to-many-mmt"
tokenizer = MBart50Tokenizer.from_pretrained(model_name)
model = MBartForConditionalGeneration.from_pretrained(model_name).to(device)

# 設定源語言和目標語言
tokenizer.src_lang = "en_XX"
tokenizer.tgt_lang = "zh_CN"

# 實體識別和標記的函數
def mark_entities(text):
    doc = nlp(text)
    modified_text = text
    entities = []
    for ent in doc.ents:
        entity_marker = f"<<{ent.label_}:{ent.text}>>"
        modified_text = modified_text.replace(ent.text, entity_marker)
        entities.append((ent.text, ent.label_))
    return modified_text, entities

# 預處理函數
def preprocess_function(examples):
    inputs, targets, entities_list = [], [], []
    for ex in examples["translation"]:
        marked_text, entities = mark_entities(ex["en"])
        inputs.append(marked_text)
        targets.append(ex["zh"])
        entities_list.append(entities)
    
    model_inputs = tokenizer(inputs, max_length=256, truncation=True, padding="max_length")
    labels = tokenizer(targets, max_length=256, truncation=True, padding="max_length")
    model_inputs["labels"] = labels["input_ids"]
    model_inputs["entities"] = entities_list
    return model_inputs

# 對數據集進行 Tokenize 和實體標記
tokenized_dataset = dataset.map(preprocess_function, batched=True)

# 訓練參數設置
training_args = Seq2SeqTrainingArguments(
    output_dir="./mbart_finetuned",
    evaluation_strategy="epoch",
    save_strategy="epoch",
    learning_rate=1e-5,
    per_device_train_batch_size=4,
    per_device_eval_batch_size=4,
    weight_decay=0.01,
    save_total_limit=3,
    num_train_epochs=8,
    predict_with_generate=True,
    logging_dir='./logs',
    load_best_model_at_end=True,
)

# 自定義 callback 以顯示每個 epoch 的損失
class LogCallback(TrainerCallback):
    def on_epoch_end(self, args, state, control, **kwargs):
        # 確保 log_history 非空，避免 IndexError
        if state.log_history:
            last_log = state.log_history[-1]
            training_loss = last_log.get("loss", "N/A")
            eval_loss = last_log.get("eval_loss", "N/A")
            print(f"Epoch {state.epoch}: Training Loss {training_loss}, Validation Loss {eval_loss}")
        else:
            print(f"Epoch {state.epoch}: No log history available for this epoch.")

# 將數據集劃分為訓練集和驗證集
train_test_split = tokenized_dataset.train_test_split(test_size=0.1)
train_dataset = train_test_split["train"]
eval_dataset = train_test_split["test"]

# 自定義 Layer-wise Learning Rate Decay
layer_decay = 0.8
optimizer_grouped_parameters = []
num_layers = len(model.model.encoder.layers)
for i, layer in enumerate(model.model.encoder.layers):
    lr = training_args.learning_rate * (layer_decay ** (num_layers - i - 1))
    optimizer_grouped_parameters.append({"params": layer.parameters(), "lr": lr})
optimizer_grouped_parameters.append({"params": model.model.shared.parameters(), "lr": training_args.learning_rate})

# 初始化 optimizer
optimizer = AdamW(optimizer_grouped_parameters, lr=training_args.learning_rate)

# 自定義 Trainer
trainer = Seq2SeqTrainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
    tokenizer=tokenizer,
    optimizers=(optimizer, None),
    callbacks=[EarlyStoppingCallback(early_stopping_patience=3), LogCallback()],
)

# 開始訓練
trainer.train()

# 後處理實體標記
def postprocess_translation(translated_text, entities):
    for ent_text, ent_label in entities:
        entity_marker_pattern = re.escape(f"<<{ent_label}:") + r"(.*?)>>"
        translated_text = re.sub(entity_marker_pattern, ent_text, translated_text, count=1)
    return translated_text

# 使用訓練好的模型進行翻譯並還原實體
def entity_aware_translate(input_text):
    marked_text, entities = mark_entities(input_text)
    inputs = tokenizer(marked_text, return_tensors="pt").to(device)
    translated_tokens = model.generate(
        inputs["input_ids"],
        max_length=256,
        forced_bos_token_id=tokenizer.lang_code_to_id["zh_CN"]
    )
    translated_text = tokenizer.decode(translated_tokens[0], skip_special_tokens=True)
    return postprocess_translation(translated_text, entities)

# 驗證指標設置
meteor_metric = evaluate.load("meteor")
bleu_metric = evaluate.load("bleu")
rouge_metric = evaluate.load("rouge")

# 評估函數
def evaluate_model(val_dataset):
    val_predictions, val_references = [], []
    for example in tqdm(val_dataset, desc="Processing Validation Set"):
        input_text = example["translation"]["en"]
        reference_text = example["translation"]["zh"]

        final_translation = entity_aware_translate(input_text)
        val_predictions.append(final_translation)
        val_references.append([reference_text])

        print("Original English Text:", input_text)
        print("Reference Text:", reference_text)
        print("Translated Text:", final_translation)
        print("=" * 50)

    # 計算各項評分
    val_meteor_score = meteor_metric.compute(predictions=val_predictions, references=val_references)
    val_bleu_score = bleu_metric.compute(predictions=val_predictions, references=val_references)
    val_rouge_score = rouge_metric.compute(predictions=val_predictions, references=val_references)

    print("Validation Set METEOR score:", val_meteor_score)
    print("Validation Set BLEU score:", val_bleu_score)
    print("Validation Set ROUGE score:", val_rouge_score)

    # BERTScore 計算
    P, R, F1 = bert_score(val_predictions, [ref[0] for ref in val_references], lang="zh", verbose=True)
    print(f"BERTScore - Precision: {P.mean().item()}, Recall: {R.mean().item()}, F1: {F1.mean().item()}")

# 執行評估
val_dataset = load_dataset('iwslt2017', 'iwslt2017-en-zh', split='validation[:10]', trust_remote_code=True)
evaluate_model(val_dataset)


Using device: cuda


Epoch,Training Loss,Validation Loss
1,No log,10.319304
2,No log,9.235552
3,No log,8.284738
4,No log,7.494404
5,No log,6.86341
6,No log,6.39684
7,No log,6.110349
8,No log,6.014125


Epoch 1.0: No log history available for this epoch.
Epoch 2.0: Training Loss N/A, Validation Loss 10.319303512573242
Epoch 3.0: Training Loss N/A, Validation Loss 9.235551834106445
Epoch 4.0: Training Loss N/A, Validation Loss 8.284737586975098
Epoch 5.0: Training Loss N/A, Validation Loss 7.494403839111328
Epoch 6.0: Training Loss N/A, Validation Loss 6.863409519195557
Epoch 7.0: Training Loss N/A, Validation Loss 6.3968400955200195
Epoch 8.0: Training Loss N/A, Validation Loss 6.110349178314209


There were missing keys in the checkpoint model loaded: ['model.encoder.embed_tokens.weight', 'model.decoder.embed_tokens.weight', 'lm_head.weight'].
[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\USER\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package punkt_tab to
[nltk_data]     C:\Users\USER\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt_tab is already up-to-date!
[nltk_data] Downloading package omw-1.4 to
[nltk_data]     C:\Users\USER\AppData\Roaming\nltk_data...
[nltk_data]   Package omw-1.4 is already up-to-date!
Processing Validation Set:   0%|                                                                | 0/10 [00:00<?, ?it/s]

Original English Text: Last year I showed these two slides so that  demonstrate that the arctic ice cap,  which for most of the last three million years  has been the size of the lower 48 states,  has shrunk by 40 percent.
Reference Text: 去年我给各位展示了两个 关于北极冰帽的演示 在过去三百万年中 其面积由相当于美国南方48州面积总和 缩减了40%
Translated Text: 这是说明北极的冰盖在最近的300万年中大部分时间都是下48个州的大小,已经缩小了40%。


Processing Validation Set:  10%|█████▌                                                  | 1/10 [00:10<01:32, 10.26s/it]

Original English Text: But this understates the seriousness of this particular problem  because it doesn't show the thickness of the ice.
Reference Text: 但这些没能完全说明这个问题的严重性 因为这没有表示出冰帽的厚度
Translated Text: 但这不足以说明这个问题的严重性,因为它不显示冰的厚度。


Processing Validation Set:  20%|███████████▏                                            | 2/10 [00:16<01:03,  7.95s/it]

Original English Text: The arctic ice cap is, in a sense,  the beating heart of the global climate system.
Reference Text: 感觉上，北极冰帽 就好象全球气候系统中跳动的心脏
Translated Text: 北极冰盖是全球气候体系的心脏。


Processing Validation Set:  30%|████████████████▊                                       | 3/10 [00:20<00:43,  6.23s/it]

Original English Text: It expands in winter and contracts in summer.
Reference Text: 冬天心脏舒张，夏天心脏收缩
Translated Text: 是的,是的,是的,是的,是的,是的,是的。


Processing Validation Set:  40%|██████████████████████▍                                 | 4/10 [00:27<00:37,  6.26s/it]

Original English Text: The next slide I show you will be  a rapid fast-forward of what's happened over the last 25 years.
Reference Text: 下面我要展示的是 在过去25年里的极剧变化
Translated Text: 下面的幻灯片是过去25年发生的事的快速转动。


Processing Validation Set:  50%|████████████████████████████                            | 5/10 [00:32<00:29,  5.88s/it]

Original English Text: The permanent ice is marked in red.
Reference Text: 红色的是永冻冰
Translated Text: 永久的冰是红色的。


Processing Validation Set:  60%|█████████████████████████████████▌                      | 6/10 [00:34<00:19,  4.77s/it]

Original English Text: As you see, it expands to the dark blue --  that's the annual ice in winter,  and it contracts in summer.
Reference Text: 你看，它正在变成深蓝色 这是每年冬天形成的年度冰 在夏天永冻冰收缩
Translated Text: 正如你所看到的那样,它扩展到深蓝色,那是冬天的冰,夏天的冰收缩。


Processing Validation Set:  70%|███████████████████████████████████████▏                | 7/10 [00:42<00:16,  5.57s/it]

Original English Text: The so-called permanent ice, five years old or older,  you can see is almost like blood,  spilling out of the body here.
Reference Text: 所谓的“永冻”，是指形成五年或更久的冰 你看，这也像血液一样 输送到身体各部位
Translated Text: 这里所谓的永久性冰(五岁或更老的)几乎像血一样,从身体里流出。


Processing Validation Set:  80%|████████████████████████████████████████████▊           | 8/10 [00:49<00:12,  6.12s/it]

Original English Text: In 25 years it's gone from this, to this.
Reference Text: 在25年的时间里，它从这里，到了这里
Translated Text: 在过去的25年里,它已经从这里变为这里了。


Processing Validation Set:  90%|██████████████████████████████████████████████████▍     | 9/10 [00:54<00:05,  5.66s/it]

Original English Text: This is a problem because the warming  heats up the frozen ground around the Arctic Ocean,  where there is a massive amount of frozen carbon  which, when it thaws, is turned into methane by microbes.
Reference Text: 值得注意的是 温室效应使得北冰洋周围的冻土层受热 而这里有大量被冻封的碳 解冻时，微生物降解碳形成甲烷
Translated Text: 这是一个问题,因为加热会加热冰冻的地区,那里有大量冰冻的碳,当它融化时,由微生物变为甲烷。


Processing Validation Set: 100%|███████████████████████████████████████████████████████| 10/10 [01:05<00:00,  6.50s/it]


Validation Set METEOR score: {'meteor': 0.00847457627118644}
Validation Set BLEU score: {'bleu': 0.0, 'precisions': [0.02, 0.0, 0.0, 0.0], 'brevity_penalty': 1.0, 'length_ratio': 2.0, 'translation_length': 50, 'reference_length': 25}
Validation Set ROUGE score: {'rouge1': 0.27999999999999997, 'rouge2': 0.06666666666666667, 'rougeL': 0.27999999999999997, 'rougeLsum': 0.27999999999999997}
calculating scores...
computing bert embedding.


100%|████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  7.26it/s]


computing greedy matching.


100%|████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 64.28it/s]


done in 0.17 seconds, 59.38 sentences/sec
BERTScore - Precision: 0.745878279209137, Recall: 0.7331073880195618, F1: 0.7390149831771851


In [7]:
# import spacy
# from datasets import load_dataset
# from transformers import MBartForConditionalGeneration, MBart50Tokenizer, Seq2SeqTrainer, Seq2SeqTrainingArguments, AdamW, EarlyStoppingCallback, TrainerCallback
# import torch
# import re
# import warnings
# from tqdm import tqdm
# import evaluate
# from bert_score import score as bert_score

# warnings.filterwarnings("ignore")

# # 設定設備
# device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# print(f"Using device: {device}")

# # 載入 SpaCy NER 模型
# nlp = spacy.load("en_core_web_trf")

# # 載入 IWSLT 2017 英中翻譯資料集
# dataset = load_dataset('iwslt2017', 'iwslt2017-en-zh', split='train[:2]', trust_remote_code=True)

# # 載入 mBART 模型和 Tokenizer
# model_name = "facebook/mbart-large-50-many-to-many-mmt"
# tokenizer = MBart50Tokenizer.from_pretrained(model_name)
# model = MBartForConditionalGeneration.from_pretrained(model_name).to(device)

# # 設定源語言和目標語言
# tokenizer.src_lang = "en_XX"
# tokenizer.tgt_lang = "zh_CN"

# # 實體識別和標記的函數
# def mark_entities(text):
#     doc = nlp(text)
#     modified_text = text
#     entities = []
#     for ent in doc.ents:
#         # 使用新的實體標記格式
#         entity_marker = f"<ENTITY type=\"{ent.label_}\">{ent.text}</ENTITY>"
#         modified_text = modified_text.replace(ent.text, entity_marker)
#         entities.append((ent.text, ent.label_))
#     return modified_text, entities

# # 預處理函數
# def preprocess_function(examples):
#     inputs, targets, entities_list = [], [], []
#     for ex in examples["translation"]:
#         marked_text, entities = mark_entities(ex["en"])
#         inputs.append(marked_text)
#         targets.append(ex["zh"])
#         entities_list.append(entities)
    
#     model_inputs = tokenizer(inputs, max_length=256, truncation=True, padding="max_length")
#     labels = tokenizer(targets, max_length=256, truncation=True, padding="max_length")
#     model_inputs["labels"] = labels["input_ids"]
#     model_inputs["entities"] = entities_list
#     return model_inputs

# # 對數據集進行 Tokenize 和實體標記
# tokenized_dataset = dataset.map(preprocess_function, batched=True)

# # 訓練參數設置
# training_args = Seq2SeqTrainingArguments(
#     output_dir="./mbart_finetuned",
#     evaluation_strategy="epoch",
#     save_strategy="epoch",
#     learning_rate=1e-5,
#     per_device_train_batch_size=4,
#     per_device_eval_batch_size=4,
#     weight_decay=0.01,
#     save_total_limit=3,
#     num_train_epochs=1,
#     predict_with_generate=True,
#     logging_dir='./logs',
#     load_best_model_at_end=True,
# )

# # 自定義 callback 以顯示每個 epoch 的 Training 和 Validation 損失
# class LogCallback(TrainerCallback):
#     def on_epoch_end(self, args, state, control, **kwargs):
#         if state.log_history:
#             last_log = state.log_history[-1]
#             training_loss = last_log.get("loss", "N/A")
#             eval_loss = last_log.get("eval_loss", "N/A")
#             print(f"Epoch {state.epoch}: Training Loss {training_loss}, Validation Loss {eval_loss}")
#         else:
#             print(f"Epoch {state.epoch}: No log history available for this epoch.")

# # 將數據集劃分為訓練集和驗證集
# train_test_split = tokenized_dataset.train_test_split(test_size=0.1)
# train_dataset = train_test_split["train"]
# eval_dataset = train_test_split["test"]

# # 自定義 Layer-wise Learning Rate Decay
# layer_decay = 0.8
# optimizer_grouped_parameters = []
# num_layers = len(model.model.encoder.layers)
# for i, layer in enumerate(model.model.encoder.layers):
#     lr = training_args.learning_rate * (layer_decay ** (num_layers - i - 1))
#     optimizer_grouped_parameters.append({"params": layer.parameters(), "lr": lr})
# optimizer_grouped_parameters.append({"params": model.model.shared.parameters(), "lr": training_args.learning_rate})

# # 初始化 optimizer
# optimizer = AdamW(optimizer_grouped_parameters, lr=training_args.learning_rate)

# # 自定義 Trainer
# trainer = Seq2SeqTrainer(
#     model=model,
#     args=training_args,
#     train_dataset=train_dataset,
#     eval_dataset=eval_dataset,
#     tokenizer=tokenizer,
#     optimizers=(optimizer, None),
#     callbacks=[EarlyStoppingCallback(early_stopping_patience=3), LogCallback()],
# )

# # 開始訓練
# trainer.train()

# # 後處理實體標記
# def postprocess_translation(translated_text, entities):
#     for ent_text, ent_label in entities:
#         entity_marker_pattern = re.escape(f"<ENTITY type=\"{ent_label}\">") + r"(.*?)</ENTITY>"
#         translated_text = re.sub(entity_marker_pattern, ent_text, translated_text, count=1)
#     return translated_text

# # 使用訓練好的模型進行翻譯並還原實體
# def entity_aware_translate(input_text):
#     marked_text, entities = mark_entities(input_text)
#     inputs = tokenizer(marked_text, return_tensors="pt").to(device)
#     translated_tokens = model.generate(
#         inputs["input_ids"],
#         max_length=256,
#         forced_bos_token_id=tokenizer.lang_code_to_id["zh_CN"]
#     )
#     translated_text = tokenizer.decode(translated_tokens[0], skip_special_tokens=True)
#     return postprocess_translation(translated_text, entities)

# # 驗證指標設置
# meteor_metric = evaluate.load("meteor")
# bleu_metric = evaluate.load("bleu")
# rouge_metric = evaluate.load("rouge")

# # 評估函數
# def evaluate_model(val_dataset):
#     val_predictions, val_references = [], []
#     for example in tqdm(val_dataset, desc="Processing Validation Set"):
#         input_text = example["translation"]["en"]
#         reference_text = example["translation"]["zh"]

#         final_translation = entity_aware_translate(input_text)
#         val_predictions.append(final_translation)
#         val_references.append([reference_text])

#         print("Original English Text:", input_text)
#         print("Reference Text:", reference_text)
#         print("Translated Text:", final_translation)
#         print("=" * 50)

#     # 計算各項評分
#     val_meteor_score = meteor_metric.compute(predictions=val_predictions, references=val_references)
#     val_bleu_score = bleu_metric.compute(predictions=val_predictions, references=val_references)
#     val_rouge_score = rouge_metric.compute(predictions=val_predictions, references=val_references)

#     print("Validation Set METEOR score:", val_meteor_score)
#     print("Validation Set BLEU score:", val_bleu_score)
#     print("Validation Set ROUGE score:", val_rouge_score)

#     # BERTScore 計算
#     P, R, F1 = bert_score(val_predictions, [ref[0] for ref in val_references], lang="zh", verbose=True)
#     print(f"BERTScore - Precision: {P.mean().item()}, Recall: {R.mean().item()}, F1: {F1.mean().item()}")

# # 執行評估
# val_dataset = load_dataset('iwslt2017', 'iwslt2017-en-zh', split='validation[:10]', trust_remote_code=True)
# evaluate_model(val_dataset)


# Pre Train

In [3]:
import spacy
from datasets import load_dataset
from transformers import MBartForConditionalGeneration, MBart50Tokenizer
import torch
from tqdm import tqdm
import evaluate
from bert_score import score as bert_score

# 設定設備
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# 載入 SpaCy NER 模型
nlp = spacy.load("en_core_web_trf")

# 載入 IWSLT 2017 英中翻譯資料集
dataset = load_dataset('iwslt2017', 'iwslt2017-en-zh', split='validation[:10]', trust_remote_code=True)

# 載入預訓練的 mBART 模型和 Tokenizer
model_name = "facebook/mbart-large-50-many-to-many-mmt"
tokenizer = MBart50Tokenizer.from_pretrained(model_name)
model = MBartForConditionalGeneration.from_pretrained(model_name).to(device)

# 設定源語言和目標語言
tokenizer.src_lang = "en_XX"  # 英文
tokenizer.tgt_lang = "zh_CN"  # 簡體中文

# 使用預訓練模型進行翻譯
def translate_text(input_text):
    inputs = tokenizer(input_text, return_tensors="pt").to(device)
    translated_tokens = model.generate(
        inputs["input_ids"],
        max_length=256,
        num_beams=5,
        temperature=0.7,
        top_k=50,
        top_p=0.95,
        forced_bos_token_id=tokenizer.lang_code_to_id["zh_CN"]  # 強制翻譯成中文
    )
    translated_text = tokenizer.decode(translated_tokens[0], skip_special_tokens=True)
    return translated_text

# 評估模型的表現
def evaluate_model(dataset):
    val_predictions = []
    val_references = []
    
    for example in tqdm(dataset, desc="Processing Validation Set"):
        input_text = example["translation"]["en"]
        reference_text = example["translation"]["zh"]
        
        # 進行翻譯
        final_translation = translate_text(input_text)
        val_predictions.append(final_translation)
        val_references.append([reference_text])
        
        # 顯示翻譯結果
        print("Original English Text:", input_text)
        print("Reference Text:", reference_text)
        print("Translated Text:", final_translation)
        print("=" * 50)

    # 計算 METEOR, BLEU 和 ROUGE 分數
    meteor_metric = evaluate.load("meteor")
    bleu_metric = evaluate.load("bleu")
    rouge_metric = evaluate.load("rouge")

    val_meteor_score = meteor_metric.compute(predictions=val_predictions, references=val_references)
    val_bleu_score = bleu_metric.compute(predictions=val_predictions, references=val_references)
    val_rouge_score = rouge_metric.compute(predictions=val_predictions, references=val_references)
    
    print("Validation Set METEOR score:", val_meteor_score)
    print("Validation Set BLEU score:", val_bleu_score)
    print("Validation Set ROUGE score:", val_rouge_score)

    # 計算 BERTScore
    P, R, F1 = bert_score(val_predictions, [ref[0] for ref in val_references], lang="zh", verbose=True)
    print(f"BERTScore - Precision: {P.mean().item()}, Recall: {R.mean().item()}, F1: {F1.mean().item()}")

# 執行評估
evaluate_model(dataset)


Processing Validation Set:  10%|█████▌                                                  | 1/10 [00:01<00:10,  1.20s/it]

Original English Text: Last year I showed these two slides so that  demonstrate that the arctic ice cap,  which for most of the last three million years  has been the size of the lower 48 states,  has shrunk by 40 percent.
Reference Text: 去年我给各位展示了两个 关于北极冰帽的演示 在过去三百万年中 其面积由相当于美国南方48州面积总和 缩减了40%
Translated Text: 去年我展示了这两张幻灯片,以证明北极冰盖,在过去的3百万年中大部分时间都是48个较低的州的大小,已经缩小了40%。


Processing Validation Set:  20%|███████████▏                                            | 2/10 [00:01<00:06,  1.24it/s]

Original English Text: But this understates the seriousness of this particular problem  because it doesn't show the thickness of the ice.
Reference Text: 但这些没能完全说明这个问题的严重性 因为这没有表示出冰帽的厚度
Translated Text: 但这不足以说明该问题的严重性,因为它没有显示冰的厚度。


Processing Validation Set:  30%|████████████████▊                                       | 3/10 [00:02<00:04,  1.48it/s]

Original English Text: The arctic ice cap is, in a sense,  the beating heart of the global climate system.
Reference Text: 感觉上，北极冰帽 就好象全球气候系统中跳动的心脏
Translated Text: 北极冰盖在某种意义上是全球气候体系的心脏。


Processing Validation Set:  40%|██████████████████████▍                                 | 4/10 [00:02<00:03,  1.85it/s]

Original English Text: It expands in winter and contracts in summer.
Reference Text: 冬天心脏舒张，夏天心脏收缩
Translated Text: 它在冬天扩张,在夏天收缩。


Processing Validation Set:  50%|████████████████████████████                            | 5/10 [00:03<00:02,  1.90it/s]

Original English Text: The next slide I show you will be  a rapid fast-forward of what's happened over the last 25 years.
Reference Text: 下面我要展示的是 在过去25年里的极剧变化
Translated Text: 下面的幻灯片将是过去25年发生的快速转动。


Processing Validation Set:  60%|█████████████████████████████████▌                      | 6/10 [00:03<00:01,  2.22it/s]

Original English Text: The permanent ice is marked in red.
Reference Text: 红色的是永冻冰
Translated Text: 永久性冰被标记为红色。


Processing Validation Set:  70%|███████████████████████████████████████▏                | 7/10 [00:04<00:01,  1.88it/s]

Original English Text: As you see, it expands to the dark blue --  that's the annual ice in winter,  and it contracts in summer.
Reference Text: 你看，它正在变成深蓝色 这是每年冬天形成的年度冰 在夏天永冻冰收缩
Translated Text: 正如你看到的,它扩展到深蓝色,那是每年冬天的冰,而夏天它收缩。


Processing Validation Set:  80%|████████████████████████████████████████████▊           | 8/10 [00:04<00:01,  1.71it/s]

Original English Text: The so-called permanent ice, five years old or older,  you can see is almost like blood,  spilling out of the body here.
Reference Text: 所谓的“永冻”，是指形成五年或更久的冰 你看，这也像血液一样 输送到身体各部位
Translated Text: 所谓的永久性冰,五岁或以上,你可以看到几乎像血一样,从身体里流出。


Processing Validation Set:  90%|██████████████████████████████████████████████████▍     | 9/10 [00:05<00:00,  1.87it/s]

Original English Text: In 25 years it's gone from this, to this.
Reference Text: 在25年的时间里，它从这里，到了这里
Translated Text: 25年后,它从这里变为这里。


Processing Validation Set: 100%|███████████████████████████████████████████████████████| 10/10 [00:06<00:00,  1.59it/s]

Original English Text: This is a problem because the warming  heats up the frozen ground around the Arctic Ocean,  where there is a massive amount of frozen carbon  which, when it thaws, is turned into methane by microbes.
Reference Text: 值得注意的是 温室效应使得北冰洋周围的冻土层受热 而这里有大量被冻封的碳 解冻时，微生物降解碳形成甲烷
Translated Text: 这是一个问题,因为加热加热了北极大洋周围的冻土,那里有大量的冰冻碳,当它融化时,由微生物转化为甲烷。



[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\USER\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package punkt_tab to
[nltk_data]     C:\Users\USER\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt_tab is already up-to-date!
[nltk_data] Downloading package omw-1.4 to
[nltk_data]     C:\Users\USER\AppData\Roaming\nltk_data...
[nltk_data]   Package omw-1.4 is already up-to-date!


Validation Set METEOR score: {'meteor': 0.007936507936507938}
Validation Set BLEU score: {'bleu': 0.0, 'precisions': [0.022727272727272728, 0.0, 0.0, 0.0], 'brevity_penalty': 1.0, 'length_ratio': 1.76, 'translation_length': 44, 'reference_length': 25}
Validation Set ROUGE score: {'rouge1': 0.27999999999999997, 'rouge2': 0.06666666666666667, 'rougeL': 0.27999999999999997, 'rougeLsum': 0.27999999999999997}
calculating scores...
computing bert embedding.


100%|████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 11.22it/s]


computing greedy matching.


100%|███████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 124.94it/s]

done in 0.10 seconds, 96.23 sentences/sec
BERTScore - Precision: 0.766173243522644, Recall: 0.7631586790084839, F1: 0.76434725522995



