In [None]:
import torch
from transformers import MBartForConditionalGeneration, MBart50Tokenizer, Seq2SeqTrainer, Seq2SeqTrainingArguments
from datasets import load_dataset

# 加載 IWSLT 2017 英中翻譯資料集
dataset = load_dataset('iwslt2017', 'iwslt2017-en-zh', split='train[:2000]', trust_remote_code=True)

# 使用自定義配置初始化 tokenizer 和模型
model_name = "facebook/mbart-large-50-many-to-many-mmt"

# 初始化 Tokenizer
tokenizer = MBart50Tokenizer.from_pretrained(model_name)
tokenizer.add_special_tokens({"additional_special_tokens": ["<s>", "</s>", "<mask>", "<pad>", "<unk>"]})

# 設定 src_lang 和 tgt_lang
tokenizer.src_lang = "en_XX"  # 英文
tokenizer.tgt_lang = "zh_CN"  # 簡體中文

# 初始化模型
model = MBartForConditionalGeneration.from_pretrained(model_name)
model.config.max_length = 200
model.config.num_beams = 5
model.config.pad_token_id = tokenizer.pad_token_id
model.config.bos_token_id = tokenizer.bos_token_id
model.config.eos_token_id = tokenizer.eos_token_id

# 配置訓練數據
def preprocess_function(examples):
    inputs = [ex["en"] for ex in examples["translation"]]
    targets = [ex["zh"] for ex in examples["translation"]]
    model_inputs = tokenizer(inputs, max_length=128, truncation=True, padding="max_length")
    labels = tokenizer(targets, max_length=128, truncation=True, padding="max_length")
    model_inputs["labels"] = labels["input_ids"]
    return model_inputs

# Tokenize 數據集
tokenized_dataset = dataset.map(preprocess_function, batched=True)

# 設定訓練參數
training_args = Seq2SeqTrainingArguments(
    output_dir="./mbart_finetuned",
    evaluation_strategy="epoch",
    learning_rate=2e-5,
    per_device_train_batch_size=4,
    per_device_eval_batch_size=4,
    weight_decay=0.01,
    save_total_limit=3,
    num_train_epochs=3,
    predict_with_generate=True,
    logging_dir='./logs',
)

# 劃分訓練和驗證數據
train_size = 0.9
train_test_split = tokenized_dataset.train_test_split(test_size=1-train_size)
train_dataset = train_test_split["train"]
eval_dataset = train_test_split["test"]

# 初始化 Trainer
trainer = Seq2SeqTrainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
    tokenizer=tokenizer,
)

# 開始訓練
trainer.train()

# 評估模型
results = trainer.evaluate()
print("Evaluation results:", results)

# 保存模型和 Tokenizer
trainer.save_model("./mbart_finetuned")
tokenizer.save_pretrained("./mbart_finetuned")


In [1]:
from datasets import load_dataset
from transformers import MBartForConditionalGeneration, MBart50Tokenizer
import torch
import evaluate  # 使用 evaluate 庫來加載指標
from tqdm import tqdm  # 加入進度條

# 加載 IWSLT 2017 英中翻譯資料集的驗證和測試集
# val_dataset = load_dataset('iwslt2017', 'iwslt2017-en-zh', split='validation', trust_remote_code=True)
test_dataset = load_dataset('iwslt2017', 'iwslt2017-en-zh', split='test', trust_remote_code=True)

# 加載微調後的模型和 tokenizer
model = MBartForConditionalGeneration.from_pretrained("./mbart_finetuned")
tokenizer = MBart50Tokenizer.from_pretrained("./mbart_finetuned")

# 設置源語言和目標語言
tokenizer.src_lang = "en_XX"
model.config.forced_bos_token_id = tokenizer.lang_code_to_id["zh_CN"]

# BLEU 評估指標
bleu_metric = evaluate.load("bleu")

# 翻譯函數
def translate(text):
    inputs = tokenizer(text, return_tensors="pt")
    translated_tokens = model.generate(
        inputs["input_ids"],
        max_length=128,
        forced_bos_token_id=tokenizer.lang_code_to_id["zh_CN"]
    )
    translated_text = tokenizer.decode(translated_tokens[0], skip_special_tokens=True)
    return translated_text

# 計算 BLEU 分數的函數
def compute_bleu(predictions, references):
    bleu_metric.add_batch(predictions=predictions, references=references)
    result = bleu_metric.compute()
    return result

# 在驗證集上進行翻譯並計算 BLEU 分數
print("Evaluating on Validation Set...")
val_predictions = []
val_references = []
for example in tqdm(val_dataset, desc="Processing Validation Set"):  # 加入進度條
    input_text = example["translation"]["en"]
    reference_text = example["translation"]["zh"]
    translated_text = translate(input_text)
    val_predictions.append(translated_text)
    val_references.append([reference_text])  # BLEU 要求 reference 為列表

val_bleu_score = compute_bleu(val_predictions, val_references)
print("Validation Set BLEU score:", val_bleu_score)

# 在測試集上進行翻譯並計算 BLEU 分數
print("Evaluating on Test Set...")
test_predictions = []
test_references = []
for example in tqdm(test_dataset, desc="Processing Test Set"):  # 加入進度條
    input_text = example["translation"]["en"]
    reference_text = example["translation"]["zh"]
    translated_text = translate(input_text)
    test_predictions.append(translated_text)
    test_references.append([reference_text])  # BLEU 要求 reference 為列表

test_bleu_score = compute_bleu(test_predictions, test_references)
print("Test Set BLEU score:", test_bleu_score)


  from .autonotebook import tqdm as notebook_tqdm


Evaluating on Validation Set...


Processing Validation Set: 100%|█████████████████████████████████████████████████████| 879/879 [28:36<00:00,  1.95s/it]


Validation Set BLEU score: {'bleu': 0.0, 'precisions': [0.01509009009009009, 0.002246559955068801, 0.0007062146892655367, 0.0], 'brevity_penalty': 1.0, 'length_ratio': 1.7542473330699329, 'translation_length': 4440, 'reference_length': 2531}
Evaluating on Test Set...


Processing Test Set: 100%|███████████████████████████████████████████████████████| 8549/8549 [3:59:36<00:00,  1.68s/it]


Test Set BLEU score: {'bleu': 0.001762937007613813, 'precisions': [0.014441944429909476, 0.0021231422505307855, 0.0009088941787491885, 0.00034660042747386053], 'brevity_penalty': 1.0, 'length_ratio': 1.7534636205156435, 'translation_length': 38222, 'reference_length': 21798}


In [2]:
from datasets import load_dataset
from transformers import MBartForConditionalGeneration, MBart50Tokenizer
import spacy
import torch
import evaluate
from tqdm import tqdm

# 加載 IWSLT 2017 英中翻譯資料集的驗證和測試集
val_dataset = load_dataset('iwslt2017', 'iwslt2017-en-zh', split='validation', trust_remote_code=True)
test_dataset = load_dataset('iwslt2017', 'iwslt2017-en-zh', split='test', trust_remote_code=True)

# 加載微調後的模型和 tokenizer
model = MBartForConditionalGeneration.from_pretrained("./mbart_finetuned")
tokenizer = MBart50Tokenizer.from_pretrained("./mbart_finetuned")

# 設置源語言和目標語言
tokenizer.src_lang = "en_XX"
model.config.forced_bos_token_id = tokenizer.lang_code_to_id["zh_CN"]

# 加載 SpaCy 英文 NER 模型
nlp = spacy.load("en_core_web_sm")

# BLEU 評估指標
bleu_metric = evaluate.load("bleu")

# 步驟 1：實體標記和替換
def mark_entities(text):
    doc = nlp(text)
    modified_text = text
    entities = []
    for ent in doc.ents:
        entity_marker = f"<{ent.label_}:{ent.text}>"
        modified_text = modified_text.replace(ent.text, entity_marker)
        entities.append((ent.text, ent.label_))
    return modified_text, entities

# 步驟 2：翻譯帶有實體標記的句子
def translate_with_entities(text):
    marked_text, entities = mark_entities(text)
    inputs = tokenizer(marked_text, return_tensors="pt")
    translated_tokens = model.generate(
        inputs["input_ids"],
        max_length=80,
        length_penalty=0.8,
        num_beams=5,
        forced_bos_token_id=tokenizer.lang_code_to_id["zh_CN"]
    )
    translated_text = tokenizer.decode(translated_tokens[0], skip_special_tokens=True)
    return translated_text, entities

# 步驟 3：後處理還原實體
def postprocess_translation(translated_text, entities):
    for ent_text, ent_label in entities:
        entity_marker = f"<{ent_label}:{ent_text}>"
        translated_text = translated_text.replace(entity_marker, ent_text)
    return translated_text

# 完整的 Entity-Aware 翻譯函數
def entity_aware_translate(text):
    translated_text, entities = translate_with_entities(text)
    final_translation = postprocess_translation(translated_text, entities)
    return final_translation

# 評估 BLEU 分數
def compute_bleu(predictions, references):
    bleu_metric.add_batch(predictions=predictions, references=references)
    result = bleu_metric.compute()
    return result

# 在驗證集上進行翻譯並計算 BLEU 分數
print("Evaluating on Validation Set...")
val_predictions = []
val_references = []
for example in tqdm(val_dataset, desc="Processing Validation Set"):
    input_text = example["translation"]["en"]
    reference_text = example["translation"]["zh"]
    final_translation = entity_aware_translate(input_text)
    val_predictions.append(final_translation)
    val_references.append([reference_text])  # BLEU 要求 reference 為列表

val_bleu_score = compute_bleu(val_predictions, val_references)
print("Validation Set BLEU score:", val_bleu_score)

# 在測試集上進行翻譯並計算 BLEU 分數
print("Evaluating on Test Set...")
test_predictions = []
test_references = []
for example in tqdm(test_dataset, desc="Processing Test Set"):
    input_text = example["translation"]["en"]
    reference_text = example["translation"]["zh"]
    final_translation = entity_aware_translate(input_text)
    test_predictions.append(final_translation)
    test_references.append([reference_text])  # BLEU 要求 reference 為列表

test_bleu_score = compute_bleu(test_predictions, test_references)
print("Test Set BLEU score:", test_bleu_score)


Evaluating on Validation Set...


Processing Validation Set: 100%|███████████████████████████████████████████████████| 879/879 [1:02:05<00:00,  4.24s/it]


Validation Set BLEU score: {'bleu': 0.0, 'precisions': [0.013434579439252336, 0.0009396288466055908, 0.00028376844494892167, 0.0], 'brevity_penalty': 1.0, 'length_ratio': 2.0292374555511654, 'translation_length': 5136, 'reference_length': 2531}
Evaluating on Test Set...


Processing Test Set: 100%|███████████████████████████████████████████████████████| 8549/8549 [9:26:22<00:00,  3.98s/it]


Test Set BLEU score: {'bleu': 0.0013290952461597158, 'precisions': [0.01223358716712477, 0.0018137678935178725, 0.0006515551592880903, 0.0002158428663932657], 'brevity_penalty': 1.0, 'length_ratio': 2.0362418570511056, 'translation_length': 44386, 'reference_length': 21798}


In [3]:
from datasets import load_dataset
from transformers import MBartForConditionalGeneration, MBart50Tokenizer
import spacy
import torch
import evaluate
from tqdm import tqdm

# 加載 IWSLT 2017 英中翻譯資料集的驗證和測試集
val_dataset = load_dataset('iwslt2017', 'iwslt2017-en-zh', split='validation', trust_remote_code=True)
test_dataset = load_dataset('iwslt2017', 'iwslt2017-en-zh', split='test', trust_remote_code=True)

# 加載微調後的模型和 tokenizer
model = MBartForConditionalGeneration.from_pretrained("./mbart_finetuned")
tokenizer = MBart50Tokenizer.from_pretrained("./mbart_finetuned")

# 設置源語言和目標語言
tokenizer.src_lang = "en_XX"
model.config.forced_bos_token_id = tokenizer.lang_code_to_id["zh_CN"]

# 加載 SpaCy 英文 NER 模型
nlp = spacy.load("en_core_web_sm")

# METEOR 評估指標
meteor_metric = evaluate.load("meteor")

# 步驟 1：實體標記和替換
def mark_entities(text):
    doc = nlp(text)
    modified_text = text
    entities = []
    for ent in doc.ents:
        entity_marker = f"<{ent.label_}:{ent.text}>"
        modified_text = modified_text.replace(ent.text, entity_marker)
        entities.append((ent.text, ent.label_))
    return modified_text, entities

# 步驟 2：翻譯帶有實體標記的句子
def translate_with_entities(text):
    marked_text, entities = mark_entities(text)
    inputs = tokenizer(marked_text, return_tensors="pt")
    translated_tokens = model.generate(
        inputs["input_ids"],
        max_length=80,
        length_penalty=0.8,
        num_beams=5,
        forced_bos_token_id=tokenizer.lang_code_to_id["zh_CN"]
    )
    translated_text = tokenizer.decode(translated_tokens[0], skip_special_tokens=True)
    return translated_text, entities

# 步驟 3：後處理還原實體
def postprocess_translation(translated_text, entities):
    for ent_text, ent_label in entities:
        entity_marker = f"<{ent_label}:{ent_text}>"
        translated_text = translated_text.replace(entity_marker, ent_text)
    return translated_text

# 完整的 Entity-Aware 翻譯函數
def entity_aware_translate(text):
    translated_text, entities = translate_with_entities(text)
    final_translation = postprocess_translation(translated_text, entities)
    return final_translation

# 評估 METEOR 分數
def compute_meteor(predictions, references):
    meteor_metric.add_batch(predictions=predictions, references=references)
    result = meteor_metric.compute()
    return result

# 在驗證集上進行翻譯並計算 METEOR 分數
print("Evaluating on Validation Set...")
val_predictions = []
val_references = []
for example in tqdm(val_dataset, desc="Processing Validation Set"):
    input_text = example["translation"]["en"]
    reference_text = example["translation"]["zh"]
    final_translation = entity_aware_translate(input_text)
    val_predictions.append(final_translation)
    val_references.append([reference_text])  # METEOR 要求 reference 為列表

val_meteor_score = compute_meteor(val_predictions, val_references)
print("Validation Set METEOR score:", val_meteor_score)

# 在測試集上進行翻譯並計算 METEOR 分數
print("Evaluating on Test Set...")
test_predictions = []
test_references = []
for example in tqdm(test_dataset, desc="Processing Test Set"):
    input_text = example["translation"]["en"]
    reference_text = example["translation"]["zh"]
    final_translation = entity_aware_translate(input_text)
    test_predictions.append(final_translation)
    test_references.append([reference_text])  # METEOR 要求 reference 為列表

test_meteor_score = compute_meteor(test_predictions, test_references)
print("Test Set METEOR score:", test_meteor_score)


Downloading builder script: 100%|█████████████████████████████████████████████████████████| 7.02k/7.02k [00:00<?, ?B/s]
[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\USER\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package punkt_tab to
[nltk_data]     C:\Users\USER\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt_tab is already up-to-date!
[nltk_data] Downloading package omw-1.4 to
[nltk_data]     C:\Users\USER\AppData\Roaming\nltk_data...
[nltk_data]   Package omw-1.4 is already up-to-date!


Evaluating on Validation Set...


Processing Validation Set: 100%|███████████████████████████████████████████████████| 879/879 [1:53:55<00:00,  7.78s/it]


Validation Set METEOR score: {'meteor': 0.015482150565798002}
Evaluating on Test Set...


Processing Test Set: 100%|███████████████████████████████████████████████████████| 8549/8549 [9:10:20<00:00,  3.86s/it]


Test Set METEOR score: {'meteor': 0.022297144359408206}


In [None]:
import optuna

def objective(trial):
    # 在指定範圍內選擇超參數
    max_length = trial.suggest_int("max_length", 50, 100)
    length_penalty = trial.suggest_float("length_penalty", 0.7, 1.0)
    num_beams = trial.suggest_int("num_beams", 3, 7)
    
    # 運行翻譯並評估
    translated_texts = []
    for example in val_dataset:
        input_text = example["translation"]["en"]
        translated_text, _ = translate_with_entities(
            input_text,
            max_length=max_length,
            length_penalty=length_penalty,
            num_beams=num_beams
        )
        translated_texts.append(translated_text)

    # 計算 METEOR 分數
    meteor_score = compute_meteor(translated_texts, [ex["translation"]["zh"] for ex in val_dataset])
    return meteor_score

# 使用 Optuna 優化
study = optuna.create_study(direction="maximize")
study.optimize(objective, n_trials=20)

# 獲取最佳參數
print("Best Parameters:", study.best_params)
print("Best METEOR Score:", study.best_value)


In [3]:
from datasets import load_dataset
from transformers import MBartForConditionalGeneration, MBart50Tokenizer
import spacy
import torch
import evaluate
from tqdm import tqdm
import re

# 加載 IWSLT 2017 英中翻譯資料集的驗證和測試集
val_dataset = load_dataset('iwslt2017', 'iwslt2017-en-zh', split='validation[:10]', trust_remote_code=True)
# test_dataset = load_dataset('iwslt2017', 'iwslt2017-en-zh', split='test[:10]', trust_remote_code=True)

# 加載微調後的模型和 tokenizer
model = MBartForConditionalGeneration.from_pretrained("./mbart_finetuned")
tokenizer = MBart50Tokenizer.from_pretrained("./mbart_finetuned")
# model_name = "facebook/mbart-large-50-many-to-many-mmt"
# tokenizer = MBart50Tokenizer.from_pretrained(model_name)
# model = MBartForConditionalGeneration.from_pretrained(model_name)

# 設置源語言和目標語言
tokenizer.src_lang = "en_XX"
model.config.forced_bos_token_id = tokenizer.lang_code_to_id["zh_CN"]

# 加載 SpaCy 英文 NER 模型
nlp = spacy.load("en_core_web_sm")

# METEOR 評估指標
meteor_metric = evaluate.load("meteor")

# 步驟 1：實體標記和替換
def mark_entities(text):
    doc = nlp(text)
    modified_text = text
    entities = []
    for ent in doc.ents:
        entity_marker = f"<<{ent.label_}:{ent.text}>>"
        modified_text = modified_text.replace(ent.text, entity_marker)
        entities.append((ent.text, ent.label_))
    return modified_text, entities


# 步驟 2：翻譯帶有實體標記的句子，加入打印語句
def translate_with_entities(text, max_length=80, length_penalty=1.2, num_beams=5):
    marked_text, entities = mark_entities(text)
    print("Original Text:", text)
    print("")
    print("Marked Text:", marked_text)
    print("Entities:", entities)
    print("")
    inputs = tokenizer(marked_text, return_tensors="pt")
    translated_tokens = model.generate(
        inputs["input_ids"],
        max_length=max_length,
        length_penalty=length_penalty,
        num_beams=num_beams,
        forced_bos_token_id=tokenizer.lang_code_to_id["zh_CN"]
    )
    translated_text = tokenizer.decode(translated_tokens[0], skip_special_tokens=True)
    print("Translated Text:", translated_text)
    # print("="*50)  # 分隔線便於區分不同句子的輸出

    return translated_text, entities

# 步驟 3：後處理還原實體
def postprocess_translation(translated_text, entities):
    for ent_text, ent_label in entities:
        # 使用正則表達式來匹配標記，無論模型是否對標記進行了部分修改
        entity_marker_pattern = re.escape(f"<<{ent_label}:") + r"(.*?)>>"
        translated_text = re.sub(entity_marker_pattern, ent_text, translated_text)
    return translated_text


# 完整的 Entity-Aware 翻譯函數
def entity_aware_translate(text):
    translated_text, entities = translate_with_entities(text)
    final_translation = postprocess_translation(translated_text, entities)
    return final_translation


# 評估 METEOR 分數
def compute_meteor(predictions, references):
    meteor_metric.add_batch(predictions=predictions, references=references)
    result = meteor_metric.compute()
    return result

# 在驗證集上進行翻譯並計算 METEOR 分數
print("Evaluating on Validation Set...")
val_predictions = []
val_references = []
for example in tqdm(val_dataset, desc="Processing Validation Set"):
    input_text = example["translation"]["en"]
    reference_text = example["translation"]["zh"]
    final_translation = entity_aware_translate(input_text)
    print("Reference Text:", reference_text)
    print("Translated Text:", final_translation)
    print("=" * 50)
    val_predictions.append(final_translation)
    val_references.append([reference_text])  # METEOR 要求 reference 為列表

val_meteor_score = compute_meteor(val_predictions, val_references)
print("Validation Set METEOR score:", val_meteor_score)

print("====================================================================")
# # 在測試集上進行翻譯並計算 METEOR 分數
# print("Evaluating on Test Set...")
# test_predictions = []
# test_references = []
# for example in tqdm(test_dataset, desc="Processing Test Set"):
#     input_text = example["translation"]["en"]
#     reference_text = example["translation"]["zh"]
#     final_translation = entity_aware_translate(input_text)
#     test_predictions.append(final_translation)
#     test_references.append([reference_text])  # METEOR 要求 reference 為列表

# test_meteor_score = compute_meteor(test_predictions, test_references)
# print("Test Set METEOR score:", test_meteor_score)


[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\USER\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package punkt_tab to
[nltk_data]     C:\Users\USER\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt_tab is already up-to-date!
[nltk_data] Downloading package omw-1.4 to
[nltk_data]     C:\Users\USER\AppData\Roaming\nltk_data...
[nltk_data]   Package omw-1.4 is already up-to-date!


Evaluating on Validation Set...


Processing Validation Set:   0%|                                                                | 0/10 [00:00<?, ?it/s]

Original Text: Last year I showed these two slides so that  demonstrate that the arctic ice cap,  which for most of the last three million years  has been the size of the lower 48 states,  has shrunk by 40 percent.

Marked Text: <<DATE:Last year>> I showed these <<CARDINAL:two>> slides so that  demonstrate that <<LOC:the arctic ice cap>>,  which for most of <<DATE:the last three million years>>  has been the size of the lower <<CARDINAL:48>> states,  has shrunk by <<PERCENT:40 percent>>.
Entities: [('Last year', 'DATE'), ('two', 'CARDINAL'), ('the arctic ice cap', 'LOC'), ('the last three million years', 'DATE'), ('48', 'CARDINAL'), ('40 percent', 'PERCENT')]



Processing Validation Set:  10%|█████▌                                                  | 1/10 [00:10<01:32, 10.27s/it]

Translated Text: 去年我展示了这些 slides,以证明,北极冰盖(LOC:the arctic ice cap),在过去的3百万年中大部分时间都是低级州的大小,已经减少了40%。
Reference Text: 去年我给各位展示了两个 关于北极冰帽的演示 在过去三百万年中 其面积由相当于美国南方48州面积总和 缩减了40%
Translated Text: 去年我展示了这些 slides,以证明,北极冰盖(LOC:the arctic ice cap),在过去的3百万年中大部分时间都是低级州的大小,已经减少了40%。
Original Text: But this understates the seriousness of this particular problem  because it doesn't show the thickness of the ice.

Marked Text: But this understates the seriousness of this particular problem  because it doesn't show the thickness of the ice.
Entities: []



Processing Validation Set:  20%|███████████▏                                            | 2/10 [00:13<00:47,  5.92s/it]

Translated Text: 但这不足以说明该问题的严重性,因为它没有显示冰的厚度。
Reference Text: 但这些没能完全说明这个问题的严重性 因为这没有表示出冰帽的厚度
Translated Text: 但这不足以说明该问题的严重性,因为它没有显示冰的厚度。
Original Text: The arctic ice cap is, in a sense,  the beating heart of the global climate system.

Marked Text: The arctic ice cap is, in a sense,  the beating heart of the global climate system.
Entities: []



Processing Validation Set:  30%|████████████████▊                                       | 3/10 [00:15<00:31,  4.47s/it]

Translated Text: 北极冰盖在某种意义上是全球气候体系的摇动之心。
Reference Text: 感觉上，北极冰帽 就好象全球气候系统中跳动的心脏
Translated Text: 北极冰盖在某种意义上是全球气候体系的摇动之心。
Original Text: It expands in winter and contracts in summer.

Marked Text: It expands in <<DATE:winter>> and contracts in <<DATE:summer>>.
Entities: [('winter', 'DATE'), ('summer', 'DATE')]



Processing Validation Set:  40%|██████████████████████▍                                 | 4/10 [00:19<00:24,  4.15s/it]

Translated Text: 它在“DATE:winter”中扩展,在“DATE:summer”中收缩。
Reference Text: 冬天心脏舒张，夏天心脏收缩
Translated Text: 它在“DATE:winter”中扩展,在“DATE:summer”中收缩。
Original Text: The next slide I show you will be  a rapid fast-forward of what's happened over the last 25 years.

Marked Text: The next slide I show you will be  a rapid fast-forward of what's happened over <<DATE:the last 25 years>>.
Entities: [('the last 25 years', 'DATE')]



Processing Validation Set:  50%|████████████████████████████                            | 5/10 [00:22<00:19,  3.86s/it]

Translated Text: 下面的幻灯片将是一个快速的快速的向前发展,发生在“日期:过去25年”。
Reference Text: 下面我要展示的是 在过去25年里的极剧变化
Translated Text: 下面的幻灯片将是一个快速的快速的向前发展,发生在“日期:过去25年”。
Original Text: The permanent ice is marked in red.

Marked Text: The permanent ice is marked in red.
Entities: []



Processing Validation Set:  60%|█████████████████████████████████▌                      | 6/10 [00:24<00:12,  3.12s/it]

Translated Text: 永久性冰被标记为红色。
Reference Text: 红色的是永冻冰
Translated Text: 永久性冰被标记为红色。
Original Text: As you see, it expands to the dark blue --  that's the annual ice in winter,  and it contracts in summer.

Marked Text: As you see, it expands to the dark blue --  that's the annual ice in <<DATE:winter>>,  and it contracts in <<DATE:summer>>.
Entities: [('winter', 'DATE'), ('summer', 'DATE')]



Processing Validation Set:  70%|███████████████████████████████████████▏                | 7/10 [00:30<00:11,  3.96s/it]

Translated Text: 正如你所看到的,它扩展到深蓝色,那是每年的冰在“DATE:冬天”中,它在“DATE:夏天”中收缩。
Reference Text: 你看，它正在变成深蓝色 这是每年冬天形成的年度冰 在夏天永冻冰收缩
Translated Text: 正如你所看到的,它扩展到深蓝色,那是每年的冰在“DATE:冬天”中,它在“DATE:夏天”中收缩。
Original Text: The so-called permanent ice, five years old or older,  you can see is almost like blood,  spilling out of the body here.

Marked Text: The so-called permanent ice, <<DATE:five years old>> or older,  you can see is almost like blood,  spilling out of the body here.
Entities: [('five years old', 'DATE')]



Processing Validation Set:  80%|████████████████████████████████████████████▊           | 8/10 [00:35<00:08,  4.41s/it]

Translated Text: 所谓的永久性冰(DATE:5岁)或更老的冰,你可以看到,几乎像血一样,从身体里流出。
Reference Text: 所谓的“永冻”，是指形成五年或更久的冰 你看，这也像血液一样 输送到身体各部位
Translated Text: 所谓的永久性冰(DATE:5岁)或更老的冰,你可以看到,几乎像血一样,从身体里流出。
Original Text: In 25 years it's gone from this, to this.

Marked Text: In <<DATE:25 years>> it's gone from this, to this.
Entities: [('25 years', 'DATE')]



Processing Validation Set:  90%|██████████████████████████████████████████████████▍     | 9/10 [00:38<00:03,  3.88s/it]

Translated Text: 在“25年”里,它已经从这儿,变成这儿了。
Reference Text: 在25年的时间里，它从这里，到了这里
Translated Text: 在“25年”里,它已经从这儿,变成这儿了。
Original Text: This is a problem because the warming  heats up the frozen ground around the Arctic Ocean,  where there is a massive amount of frozen carbon  which, when it thaws, is turned into methane by microbes.

Marked Text: This is a problem because the warming  heats up the frozen ground around <<LOC:the Arctic Ocean>>,  where there is a massive amount of frozen carbon  which, when it thaws, is turned into methane by microbes.
Entities: [('the Arctic Ocean', 'LOC')]



Processing Validation Set: 100%|███████████████████████████████████████████████████████| 10/10 [00:44<00:00,  4.49s/it]

Translated Text: 这是一个问题,因为全球变暖加热了在“北极海洋”周围的冻土,那里有大量冰冻的碳,当它融化时,由微生物转化为甲烷。
Reference Text: 值得注意的是 温室效应使得北冰洋周围的冻土层受热 而这里有大量被冻封的碳 解冻时，微生物降解碳形成甲烷
Translated Text: 这是一个问题,因为全球变暖加热了在“北极海洋”周围的冻土,那里有大量冰冻的碳,当它融化时,由微生物转化为甲烷。
Validation Set METEOR score: {'meteor': 0.006756756756756757}



