In [1]:
import json
import torch
from transformers import T5Tokenizer, T5ForConditionalGeneration, Trainer, TrainingArguments, DataCollatorForSeq2Seq
from datasets import load_dataset, Dataset
import os
import torch.nn.functional as F
import torch
from tqdm import tqdm
import evaluate

### Preprocessing

In [2]:
def load_preprocess_train(data_path,only_eamt=False):
    with open(data_path, "r", encoding="utf-8") as f:
        data = json.load(f)
    formatted_data = []
    for sample in data:
        source_text = sample["source"]
        target_text = sample["target"]
        entities = sample.get("enriched_entities", [])

        entity_annotations = [f"{ent['entity_name']['en']}" for ent in entities]
        entity_text = ", ".join(entity_annotations) if entity_annotations else "None"

        if only_eamt != True:
            formatted_data.append({
                    "task": "NER",
                    "input": f"Recognize entities: {source_text}",
                    "output": entity_text
                })
        formatted_data.append({
            "task": "Entity-aware MT",
            "input": f"Entity translate (EN→FR): {source_text}",
            "output": target_text
        })
    return Dataset.from_list(formatted_data)

In [3]:
def load_preprocess_test(data_path):
    with open(data_path, "r", encoding="utf-8") as f:
        data = json.load(f)
    formatted_data = []
    for sample in data:
        source_text = sample["source"]
        target_text = sample["target"]
        formatted_data.append({
            "task": "Entity-aware MT",
            "input": f"Entity translate (EN→FR): {source_text}",
            "output": target_text
        })
    return formatted_data

In [13]:
data_path_test = os.path.join(os.getcwd(),r"data\test_data.json")
load_preprocess_test(data_path_test)

[{'task': 'Entity-aware MT',
  'input': 'Entity translate (EN→FR): How many states touch Lake Michigan?',
  'output': 'Combien d’États touchent le lac Michigan ?'},
 {'task': 'Entity-aware MT',
  'input': 'Entity translate (EN→FR): Which country has a larger population, Canada or China?',
  'output': 'Quel est le pays le plus peuplé, le Canada ou la Chine ?'},
 {'task': 'Entity-aware MT',
  'input': 'Entity translate (EN→FR): Who directed the movie that is based on the second book of The Lord of the Rings series?',
  'output': 'Qui a réalisé le film inspiré du deuxième livre de la série Le Seigneur des anneaux ?'},
 {'task': 'Entity-aware MT',
  'input': 'Entity translate (EN→FR): Which country did not ratify the Treaty of Versailles?',
  'output': 'Quel pays n’a pas ratifié le traité de Versailles ?'},
 {'task': 'Entity-aware MT',
  'input': 'Entity translate (EN→FR): Who wrote Neuromancer?',
  'output': 'Qui a écrit le livre Neuromancien ?'},
 {'task': 'Entity-aware MT',
  'input': '

### Loading Model 

In [185]:
tokenizer = T5Tokenizer.from_pretrained("google/flan-t5-base")
model = T5ForConditionalGeneration.from_pretrained("google/flan-t5-base")

In [186]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)
model = model.to(device)

cuda


### Tokenization

In [14]:
def preprocess_function(samples):
    # Tokenize inputs and targets
    inputs = tokenizer(samples["input"], padding="max_length", truncation=True, max_length=128)
    targets = tokenizer(samples["output"], padding="max_length", truncation=True, max_length=128)
    inputs["labels"] = targets["input_ids"]
    inputs = {key: torch.tensor(value).to(device) for key, value in inputs.items()}
    return inputs

In [15]:
data_path = os.path.join(os.getcwd(),r"data\train_data.json")
train_data = load_preprocess_train(data_path)

In [16]:
tokenized_dataset = train_data.map(preprocess_function, batched=True)
data_collator = DataCollatorForSeq2Seq(tokenizer, model=model)

Map:   0%|          | 0/7742 [00:00<?, ? examples/s]

NameError: name 'device' is not defined

### Custom Loss function (Prioritizing Translation over NER)

In [150]:
class CustomTrainer(Trainer):
    def compute_loss(self, model, inputs, return_outputs=False, **kwargs):
        """Custom loss function to prioritize translation over NER."""
        labels = inputs.pop("labels")  # Extract target labels
        outputs = model(**inputs)
        logits = outputs.logits  # Get logits
        loss = F.cross_entropy(logits.view(-1, logits.size(-1)), labels.view(-1), ignore_index=-100)
        ner_weight = 0.4
        translation_weight = 0.6
        task_type = inputs.get("task_type", ["Translation"] * logits.shape[0])
        task_weights = torch.tensor(
            [ner_weight if "NER" in task else translation_weight for task in task_type],
            device=logits.device,
            dtype=torch.float,
        )
        weighted_loss = loss * task_weights.mean()
        return (weighted_loss, outputs) if return_outputs else weighted_loss

## Model Training

In [151]:
training_args = TrainingArguments(
    output_dir=output_dir,
    evaluation_strategy="epoch",
    save_strategy="epoch",
    learning_rate=3e-4,
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    num_train_epochs=50,
    weight_decay=0.01,
    save_total_limit=2,
    push_to_hub=False,
)
trainer = CustomTrainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset,
    eval_dataset=tokenized_dataset,
    tokenizer=tokenizer,
    data_collator=data_collator,
)

trainer.train()


  trainer = CustomTrainer(


Epoch,Training Loss,Validation Loss
1,0.4139,0.03593
2,0.0456,0.022345
3,0.0327,0.015476
4,0.0256,0.010815
5,0.0195,0.008124
6,0.0164,0.006235
7,0.0135,0.004777
8,0.0116,0.003702
9,0.0098,0.002936
10,0.0084,0.002543


SafetensorError: Error while serializing: IoError(Os { code: 1224, kind: Uncategorized, message: "The requested operation cannot be performed on a file with a user-mapped section open." })

In [166]:
output_dir =  os.path.join(os.getcwd(),r"tf_base_finetuned")
model.save_pretrained(output_dir, safe_serialization=False)
tokenizer.save_pretrained(output_dir)
print(f"Model saved to {output_dir}")

Model saved to D:\Byom (Don't Delete)\LRNLP-updated-clean-21-04\tf_base_finetuned


### Evaluation

In [152]:
data_path_test = os.path.join(os.getcwd(),r"data\test_data.json")
test_data = load_preprocess_test(data_path_test)

In [17]:
def evaluate_test_data(test_data,tokenizer):
    bleu = evaluate.load("sacrebleu")
    sources,predictions, references = [],[],[]
    for sample in tqdm(test_data, desc="Evaluating Translations"):
        input_text = sample["input"]
        sources.append(input_text[26:])
        expected_output = sample["output"]
        inputs = tokenizer(input_text, return_tensors="pt", padding=True, truncation=True)
        inputs = {k: v.to(model.device) for k, v in inputs.items()} 
        outputs = model.generate(**inputs)
        decoded_output = tokenizer.decode(outputs[0], skip_special_tokens=True)
        predictions.append(decoded_output)
        references.append([expected_output])  
    bleu_score = bleu.compute(predictions=predictions, references=references)
    print(f"BLEU Score for Entity-Aware MT: {bleu_score['score']:.2f}")
    return bleu_score,sources,predictions,references

In [155]:
score,sources,predictions,references = evaluate_test_data(test_data,tokenizer)

Evaluating Translations: 100%|█████████████████████████████████████████████████████| 1660/1660 [15:40<00:00,  1.77it/s]

BLEU Score for Entity-Aware MT: 42.28





In [160]:
# data_path_test = os.path.join(os.getcwd(),r"data\train_data.json")
# test_data = load_preprocess_test(data_path_test)

In [163]:
# predictions[10],references[10],sources[10]

('De quelle civilisation le dieu Râ était-il le dieu du soleil ?',
 ['De quelle civilisation le dieu Râ était-il le dieu du soleil ?'],
 'The God Ra, a Sun God, is from which civilization?')

### Loading the saved model and evaluating

In [167]:
model_path = os.path.join(os.getcwd(),"tf_base_finetuned" )
saved_tokenizer = T5Tokenizer.from_pretrained(model_path)
saved_model = T5ForConditionalGeneration.from_pretrained(model_path)

In [None]:
_,sources,predictions,references = evaluate_test_data(test_data[:10],saved_tokenizer)

In [None]:
comet_data = [{"src": src, "mt": mt} for src, mt in zip(sources, predictions)]
scores = comet_model.predict(comet_data, batch_size=8, gpus=1)
print("System-level COMET score:", scores["system_score"])
print("📊 Segment-level scores:", scores["scores"])

### Training without CustomLoss

In [190]:
training_args = TrainingArguments(
    output_dir=output_dir,
    evaluation_strategy="epoch",
    save_strategy="epoch",
    learning_rate=3e-4,
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    num_train_epochs=30,
    weight_decay=0.01,
    save_total_limit=2,
    push_to_hub=False,
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset,
    eval_dataset=tokenized_dataset,
    tokenizer=tokenizer,
    data_collator=data_collator,
)

trainer.train()

  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,0.689,0.058418
2,0.0751,0.036339
3,0.0533,0.025072
4,0.0414,0.017917
5,0.0321,0.012929
6,0.0266,0.009879
7,0.0212,0.007241
8,0.0182,0.005521
9,0.0156,0.004874
10,0.0131,0.003487


TrainOutput(global_step=29040, training_loss=0.026444886944618478, metrics={'train_runtime': 14799.9572, 'train_samples_per_second': 15.693, 'train_steps_per_second': 1.962, 'total_flos': 3.976043566989312e+16, 'train_loss': 0.026444886944618478, 'epoch': 30.0})

In [191]:
_,sources,predictions,references = evaluate_test_data(test_data,tokenizer)

Evaluating Translations: 100%|█████████████████████████████████████████████████████| 1660/1660 [15:47<00:00,  1.75it/s]


BLEU Score for Entity-Aware MT: 42.29


### Training for only EA-MT task

In [170]:
tokenizer = T5Tokenizer.from_pretrained("google/flan-t5-base")
model = T5ForConditionalGeneration.from_pretrained("google/flan-t5-base")

In [171]:
model = model.to(device)

In [172]:
data_path = os.path.join(os.getcwd(),r"data\train_data.json")
train_data = load_preprocess_train(data_path,only_eamt=True)

In [173]:
tokenized_dataset = train_data.map(preprocess_function, batched=True)
data_collator = DataCollatorForSeq2Seq(tokenizer, model=model)

Map:   0%|          | 0/3871 [00:00<?, ? examples/s]

In [174]:
training_args = TrainingArguments(
    output_dir=output_dir,
    evaluation_strategy="epoch",
    save_strategy="epoch",
    learning_rate=3e-4,
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    num_train_epochs=30,
    weight_decay=0.01,
    save_total_limit=2,
    push_to_hub=False,
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset,
    eval_dataset=tokenized_dataset,
    tokenizer=tokenizer,
    data_collator=data_collator,
)

trainer.train()

  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,No log,0.10955
2,0.606200,0.073314
3,0.136100,0.050919
4,0.102500,0.035841
5,0.078500,0.026873
6,0.064100,0.020288
7,0.051600,0.014458
8,0.042000,0.011597
9,0.035500,0.009079
10,0.029800,0.007692


TrainOutput(global_step=14520, training_loss=0.0467270033972815, metrics={'train_runtime': 7625.3254, 'train_samples_per_second': 15.23, 'train_steps_per_second': 1.904, 'total_flos': 1.988021783494656e+16, 'train_loss': 0.0467270033972815, 'epoch': 30.0})

In [175]:
output_dir = os.path.join(os.getcwd(),"tf_only_eamt")
model.save_pretrained(output_dir)
tokenizer.save_pretrained(output_dir)
print(f"Model saved to {output_dir}")

Model saved to D:\Byom (Don't Delete)\LRNLP-updated-clean-21-04\tf_only_eamt


In [176]:
data_path_test = os.path.join(os.getcwd(),r"data\test_data.json")
test_data = load_preprocess_test(data_path_test)

In [195]:
_,sources,predictions,references = evaluate_test_data(test_data,tokenizer)

Evaluating Translations: 100%|█████████████████████████████████████████████████████| 1660/1660 [16:49<00:00,  1.65it/s]


BLEU Score for Entity-Aware MT: 33.76


#### Testing different models

In [1]:
import json
import torch
from transformers import T5Tokenizer, T5ForConditionalGeneration, Trainer, TrainingArguments, DataCollatorForSeq2Seq
from datasets import load_dataset, Dataset
import os
import torch.nn.functional as F
import torch
from tqdm import tqdm
import evaluate

In [2]:
def load_preprocess_train(data_path,only_eamt=False):
    with open(data_path, "r", encoding="utf-8") as f:
        data = json.load(f)
    formatted_data = []
    for sample in data:
        source_text = sample["source"]
        target_text = sample["target"]
        entities = sample.get("enriched_entities", [])

        entity_annotations = [f"{ent['entity_name']['en']}" for ent in entities]
        entity_text = ", ".join(entity_annotations) if entity_annotations else "None"

        if only_eamt != True:
            formatted_data.append({
                    "task": "NER",
                    "input": f"Recognize entities: {source_text}",
                    "output": entity_text
                })
        formatted_data.append({
            "task": "Entity-aware MT",
            "input": f"Entity translate (EN→FR): {source_text}",
            "output": target_text
        })
    return Dataset.from_list(formatted_data)
def load_preprocess_test(data_path):
    with open(data_path, "r", encoding="utf-8") as f:
        data = json.load(f)
    formatted_data = []
    for sample in data:
        source_text = sample["source"]
        target_text = sample["target"]
        formatted_data.append({
            "task": "Entity-aware MT",
            "input": f"Entity translate (EN→FR): {source_text}",
            "output": target_text
        })
    return formatted_data

In [3]:
def trainer_tester(model_src,output_dir,epochs):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    tokenizer = T5Tokenizer.from_pretrained(model_src)
    model = T5ForConditionalGeneration.from_pretrained(model_src)
    model = model.to(device)
    data_path = os.path.join(os.getcwd(),r"data\train_data.json")
    train_data = load_preprocess_train(data_path)
    def preprocess_function(samples):
        inputs = tokenizer(samples["input"], padding="max_length", truncation=True, max_length=128)
        targets = tokenizer(samples["output"], padding="max_length", truncation=True, max_length=128)
        inputs["labels"] = targets["input_ids"]
        inputs = {key: torch.tensor(value).to(device) for key, value in inputs.items()}
        return inputs    
    tokenized_dataset = train_data.map(preprocess_function, batched=True)
    data_collator = DataCollatorForSeq2Seq(tokenizer, model=model)
    output_dir = os.path.join(os.getcwd(),output_dir)
    training_args = TrainingArguments(
        output_dir=output_dir,
        evaluation_strategy="epoch",
        save_strategy="epoch",
        learning_rate=3e-4,
        per_device_train_batch_size=4,
        per_device_eval_batch_size=4,
        num_train_epochs=epochs,
        weight_decay=0.01,
        save_total_limit=2,
        push_to_hub=False,
    )
    
    trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=tokenized_dataset,
        eval_dataset=tokenized_dataset,
        tokenizer=tokenizer,
        data_collator=data_collator,
    )
    
    trainer.train()
    data_path_test = os.path.join(os.getcwd(),r"data\test_data.json")
    test_data = load_preprocess_test(data_path_test)
    def evaluate_test_data(test_data,tokenizer):
        bleu = evaluate.load("sacrebleu")
        sources,predictions, references = [],[],[]
        for sample in tqdm(test_data, desc="Evaluating Translations"):
            input_text = sample["input"]
            sources.append(input_text[26:])
            expected_output = sample["output"]
            inputs = tokenizer(input_text, return_tensors="pt", padding=True, truncation=True)
            inputs = {k: v.to(model.device) for k, v in inputs.items()} 
            outputs = model.generate(**inputs)
            decoded_output = tokenizer.decode(outputs[0], skip_special_tokens=True)
            predictions.append(decoded_output)
            references.append([expected_output])  
        bleu_score = bleu.compute(predictions=predictions, references=references)
        print(f"BLEU Score for Entity-Aware MT: {bleu_score['score']:.2f}")
        return bleu_score,sources,predictions,references
    bleu_score,sources,predictions,references = evaluate_test_data(test_data,tokenizer)
    return bleu_score,sources,predictions,references

In [21]:
model2 = "google/t5-v1_1-small"
bleu_score,sources,predictions,references = trainer_tester(model2,"t5-small",10)

Map:   0%|          | 0/7742 [00:00<?, ? examples/s]

  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,3.2159,0.214477
2,0.2623,0.146666
3,0.1967,0.116541
4,0.1673,0.099362
5,0.1443,0.087831
6,0.1331,0.07908
7,0.1215,0.072148
8,0.1148,0.068098
9,0.1077,0.064946
10,0.1027,0.064129


Evaluating Translations: 100%|█████████████████████████████████████████████████████| 1660/1660 [10:16<00:00,  2.69it/s]


BLEU Score for Entity-Aware MT: 33.09


In [22]:
del model2
torch.cuda.empty_cache()

In [23]:
model3 = "google/t5-v1_1-base"
bleu_score,sources,predictions,references = trainer_tester(model3,"t5-base",10)

tokenizer_config.json:   0%|          | 0.00/1.86k [00:00<?, ?B/s]

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development


spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/1.79k [00:00<?, ?B/s]

config.json:   0%|          | 0.00/605 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/990M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/147 [00:00<?, ?B/s]

Map:   0%|          | 0/7742 [00:00<?, ? examples/s]

  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,6.987,0.305785
2,0.3282,0.240272
3,0.2566,0.119371
4,0.1479,0.077986
5,0.1061,0.058366
6,0.0873,0.044545
7,0.0733,0.036549
8,0.0631,0.030705
9,0.0553,0.027256
10,0.0497,0.025757


model.safetensors:   0%|          | 0.00/990M [00:00<?, ?B/s]

Evaluating Translations: 100%|█████████████████████████████████████████████████████| 1660/1660 [15:49<00:00,  1.75it/s]


BLEU Score for Entity-Aware MT: 36.91


In [24]:
del model3
torch.cuda.empty_cache()

In [25]:
model4 = "google/mt5-small"
bleu_score,sources,predictions,references = trainer_tester(model4,"mt5-small",10)

tokenizer_config.json:   0%|          | 0.00/82.0 [00:00<?, ?B/s]

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development


spiece.model:   0%|          | 0.00/4.31M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/99.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/553 [00:00<?, ?B/s]

You are using a model of type mt5 to instantiate a model of type t5. This is not supported for all configurations of models and can yield errors.


pytorch_model.bin:   0%|          | 0.00/1.20G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/147 [00:00<?, ?B/s]

Map:   0%|          | 0/7742 [00:00<?, ? examples/s]

  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,5.1227,0.715235
2,0.5983,0.263588
3,0.2721,0.143983
4,0.1829,0.105486
5,0.1452,0.08758
6,0.1272,0.076745
7,0.1137,0.067438
8,0.1043,0.062088
9,0.0961,0.059473
10,0.0908,0.057787


model.safetensors:   0%|          | 0.00/1.20G [00:00<?, ?B/s]

Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.
Evaluating Translations: 100%|█████████████████████████████████████████████████████| 1660/1660 [12:51<00:00,  2.15it/s]


BLEU Score for Entity-Aware MT: 33.50


In [26]:
del model4
torch.cuda.empty_cache()

In [27]:
model5 = "google/mt5-base"
bleu_score,sources,predictions,references = trainer_tester(model5,"mt5-base",5)

tokenizer_config.json:   0%|          | 0.00/376 [00:00<?, ?B/s]

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development


spiece.model:   0%|          | 0.00/4.31M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/65.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/702 [00:00<?, ?B/s]

You are using a model of type mt5 to instantiate a model of type t5. This is not supported for all configurations of models and can yield errors.


pytorch_model.bin:   0%|          | 0.00/2.33G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/147 [00:00<?, ?B/s]

Map:   0%|          | 0/7742 [00:00<?, ? examples/s]

  trainer = Trainer(


model.safetensors:   0%|          | 0.00/2.33G [00:00<?, ?B/s]

Epoch,Training Loss,Validation Loss
1,2.1215,0.109755
2,0.1238,0.055694
3,0.0788,0.03969
4,0.0607,0.030259
5,0.0487,0.027131


Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.
Evaluating Translations: 100%|█████████████████████████████████████████████████████| 1660/1660 [16:08<00:00,  1.71it/s]


BLEU Score for Entity-Aware MT: 41.21


In [34]:
del model5
torch.cuda.empty_cache()

NameError: name 'model5' is not defined

In [7]:
model1 = "google/flan-t5-large"
bleu_score,sources,predictions,references = trainer_tester(model1,"flan-t5-large",2)

You are using the default legacy behaviour of the <class 'transformers.models.t5.tokenization_t5.T5Tokenizer'>. This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565


Map:   0%|          | 0/7742 [00:00<?, ? examples/s]

  trainer = Trainer(
Passing a tuple of `past_key_values` is deprecated and will be removed in Transformers v4.48.0. You should pass an instance of `EncoderDecoderCache` instead, e.g. `past_key_values=EncoderDecoderCache.from_legacy_cache(past_key_values)`.


Epoch,Training Loss,Validation Loss
1,0.084,0.039539
2,0.0488,0.021369


Evaluating Translations: 100%|█████████████████████████████████████████████████████| 1660/1660 [28:57<00:00,  1.05s/it]


BLEU Score for Entity-Aware MT: 43.15


In [8]:
del model1
torch.cuda.empty_cache()

In [4]:
model1 = "google/flan-t5-large"
bleu_score,sources,predictions,references = trainer_tester(model1,"flan-t5-large",10)

You are using the default legacy behaviour of the <class 'transformers.models.t5.tokenization_t5.T5Tokenizer'>. This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565


Map:   0%|          | 0/7742 [00:00<?, ? examples/s]

  trainer = Trainer(
Passing a tuple of `past_key_values` is deprecated and will be removed in Transformers v4.48.0. You should pass an instance of `EncoderDecoderCache` instead, e.g. `past_key_values=EncoderDecoderCache.from_legacy_cache(past_key_values)`.


Epoch,Training Loss,Validation Loss
1,0.0867,0.044335
2,0.0598,0.025214
3,0.0366,0.015147
4,0.0247,0.009681
5,0.0183,0.006549
6,0.0118,0.004213
7,0.0078,0.002476
8,0.0061,0.001441
9,0.0039,0.000793
10,0.0023,0.000479


Evaluating Translations: 100%|█████████████████████████████████████████████████████| 1660/1660 [28:07<00:00,  1.02s/it]


BLEU Score for Entity-Aware MT: 43.13


In [5]:
del model1
torch.cuda.empty_cache()