In [1]:
import json
import torch
from transformers import T5Tokenizer, T5ForConditionalGeneration, Trainer, TrainingArguments, DataCollatorForSeq2Seq
from datasets import load_dataset, Dataset

In [2]:
def preprocess_data(data, output_path):
    formatted_data = []
    for sample in data:
        source_text = sample["source"]
        target_text = sample["target"]
        entities = sample.get("enriched_entities", [])

        entity_annotations = [f"{ent['entity_name']['en']} [{ent['entity_type']}]" for ent in entities]
        entity_text = ", ".join(entity_annotations) if entity_annotations else "None"

        # Reduce NER examples to avoid overfitting
        if len(formatted_data) % 3 == 0:  # Keep only 1/3 NER examples
            formatted_data.append({
                "task": "NER",
                "input": f"Recognize entities: {source_text}",
                "output": entity_text
            })

        # Keep more translation examples
        formatted_data.append({
            "task": "Entity-aware MT",
            "input": f"Entity translate (EN→FR): {source_text}",
            "output": target_text
        })

    with open(output_path, "w", encoding="utf-8") as f:
        json.dump(formatted_data, f, indent=4, ensure_ascii=False)

    print(f"Processed data saved to {output_path}")


In [17]:
import json

def read_jsonl(file_path):
    """Reads a JSONL file and yields dictionaries one by one (memory efficient)."""
    try:
        with open(file_path, 'r', encoding='utf-8') as f:
            for line_number, line in enumerate(f, start=1):
                try:
                    yield json.loads(line)
                except json.JSONDecodeError as e:
                    print(f"Error decoding JSON on line {line_number}: {e}")
    except FileNotFoundError:
        print(f"Error: File '{file_path}' not found.")


In [18]:
import os
print(os.getcwd())

D:\Byom (Don't Delete)\main\EA-MT\notebooks


In [19]:
train_path = r"D:\Byom (Don't Delete)\main\EA-MT\data\train\train_data.json"

In [20]:
print(os.path.exists(train_path))

True


In [21]:
data_train = read_jsonl(train_path)

In [24]:
data_train

<generator object read_jsonl at 0x000001E4FCFC0F20>

In [25]:
preprocess_data(data_train, "train_processed_data.json")

Error decoding JSON on line 1: Expecting value: line 2 column 1 (char 2)
Error decoding JSON on line 2: Expecting property name enclosed in double quotes: line 2 column 1 (char 6)
Error decoding JSON on line 3: Extra data: line 1 column 13 (char 12)
Error decoding JSON on line 4: Extra data: line 1 column 24 (char 23)
Error decoding JSON on line 5: Extra data: line 1 column 24 (char 23)
Error decoding JSON on line 6: Extra data: line 1 column 17 (char 16)
Error decoding JSON on line 7: Extra data: line 1 column 17 (char 16)
Error decoding JSON on line 8: Extra data: line 1 column 19 (char 18)


TypeError: string indices must be integers, not 'str'

In [9]:
tokenizer = T5Tokenizer.from_pretrained("google/flan-t5-small")
model = T5ForConditionalGeneration.from_pretrained("google/flan-t5-small")

You are using the default legacy behaviour of the <class 'transformers.models.t5.tokenization_t5.T5Tokenizer'>. This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565


In [10]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)

In [11]:
def preprocess_function(samples):
    # Tokenize inputs and targets
    inputs = tokenizer(samples["input"], padding="max_length", truncation=True, max_length=128)
    targets = tokenizer(samples["output"], padding="max_length", truncation=True, max_length=128)

    # Set the labels for the inputs
    inputs["labels"] = targets["input_ids"]

    # Move tensors to CUDA (GPU) or CPU
    inputs = {key: torch.tensor(value).to(device) for key, value in inputs.items()}
    
    return inputs


In [12]:
def load_and_prepare_data(input_path):
    with open(input_path, "r", encoding="utf-8") as f:
        data = json.load(f)

    return Dataset.from_list(data)

data_path = "train_processed_data.json"
dataset = load_and_prepare_data(data_path)

In [13]:
tokenized_dataset = dataset.map(preprocess_function, batched=True)
data_collator = DataCollatorForSeq2Seq(tokenizer, model=model)

Map:   0%|          | 0/11613 [00:00<?, ? examples/s]

In [16]:
output_dir = "t5_finetuned"

training_args = TrainingArguments(
    output_dir=output_dir,
    evaluation_strategy="epoch",
    save_strategy="epoch",
    learning_rate=3e-4,
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    num_train_epochs=1,
    weight_decay=0.01,
    save_total_limit=2,
    push_to_hub=False,
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset,
    eval_dataset=tokenized_dataset,
    tokenizer=tokenizer,
    data_collator=data_collator,
)

trainer.train()
model.save_pretrained(output_dir)
tokenizer.save_pretrained(output_dir)
print(f"Model saved to {output_dir}")


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,0.0957,0.077598


Model saved to t5_finetuned


In [24]:
# !pip install evaluate

In [25]:
# !pip install sacrebleu

In [17]:
data_test[400]

{'id': '307ce37f',
 'source_locale': 'en',
 'target_locale': 'fr',
 'source': 'Which country is bigger in size, USA or China?',
 'target': 'Entre les USA et la Chine : quel pays a la plus grande superficie ?',
 'entities': ['Q148'],
 'from': 'mintaka',
 'enriched_entities': [{'qid': 'Q148',
   'entity_name': {'en': "People's Republic of China",
    'fr': 'république populaire de Chine'},
   'entity_type': 'sovereign state'}]}

In [19]:
import torch  

# Move model to CUDA  
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")  
model.to(device)  

# Prepare input  
sample_input = data_test[400]["source"]  
print("Sample Input:", sample_input)  

inputs = tokenizer(sample_input, return_tensors="pt", padding=True, truncation=True).to(device)  

# Generate output  
outputs = model.generate(**inputs)  

# Decode and print output  
print("Sample Output:", tokenizer.decode(outputs[0], skip_special_tokens=True))  


Sample Input: Which country is bigger in size, USA or China?
Sample Output: People's Republic of China [sovereign state], United States [sovereign


In [None]:
preprocess_data(data_test,"test_split.json")

In [30]:
import json
import torch
import evaluate
from transformers import T5Tokenizer, T5ForConditionalGeneration
from tqdm import tqdm

# Load model and tokenizer
model_path = "t5_finetuned"
tokenizer = T5Tokenizer.from_pretrained(model_path)
model = T5ForConditionalGeneration.from_pretrained(model_path)

def load_test_data(input_path):
    with open(input_path, "r", encoding="utf-8") as f:
        return json.load(f)

# # Load test set (limit to 10 samples for debugging)
test_data_path = "test_split.json"
test_data = load_test_data(test_data_path)[:50]

# Quick test to check model inference
sample_input = test_data[0]["input"]
print("Sample Input:", sample_input)
inputs = tokenizer(sample_input, return_tensors="pt", padding=True, truncation=True)
outputs = model.generate(**inputs)
print("Sample Output:", tokenizer.decode(outputs[0], skip_special_tokens=True))

bleu = evaluate.load("sacrebleu")

def restore_entities(text, entity_mapping):
    """Replace placeholders in translated text with original entity names."""
    for placeholder, original in entity_mapping.items():
        text = text.replace(placeholder, original)
    return text

predictions, references = [], []

for sample in tqdm(test_data, desc="Evaluating Translations"):
    input_text = f"Entity translate (EN→FR): {sample['input']}"
    expected_output = sample["output"]
    entity_mapping = sample.get("enriched_entities", {})

    inputs = tokenizer(input_text, return_tensors="pt", padding=True, truncation=True)
    outputs = model.generate(**inputs)
    decoded_output = tokenizer.decode(outputs[0], skip_special_tokens=True)

    # Restore entities before BLEU evaluation
    final_output = restore_entities(decoded_output, entity_mapping)
    final_reference = restore_entities(expected_output, entity_mapping)

    predictions.append(final_output)
    references.append([final_reference])

# Compute BLEU score on restored translations
bleu_score = bleu.compute(predictions=predictions, references=references)
print(f"Updated BLEU Score for Translation: {bleu_score['score']:.2f}")


Sample Input: Recognize entities: How may states touch Lake Michigan?
Sample Output: Lake Michigan [lake]


Evaluating Translations: 100%|█████████████████████████████████████████████████████████| 50/50 [00:23<00:00,  2.09it/s]

Updated BLEU Score for Translation: 31.44





In [36]:
test_data[1]

{'task': 'Entity-aware MT',
 'input': 'Entity translate (EN→FR): How may states touch Lake Michigan?',
 'output': 'Combien d’États touchent le lac Michigan ?'}

In [32]:
for i in range(len(predictions)):
    print (i,references[i],"\t",predictions[i])

0 ['Lake Michigan [lake]'] 	 Combien de États a-t-il touché le lac Michigan ?
1 ['Combien d’États touchent le lac Michigan ?'] 	 Combien d’États touchent le lac Michigan ?
2 ['Combien d’États touchent le lac Michigan ?'] 	 Combien d’États touchent-on le lac Michigan ?
3 ["People's Republic of China [sovereign state]"] 	 Quel pays a le plus peuplé, le Canada ou le Chine ?
4 ['Quel est le pays le plus peuplé, le Canada ou la Chine ?'] 	 Quel pays est le plus peuplé, le Canada ou le Chine ?
5 ['Quel est le pays le plus peuplé, le Canada ou la Chine ?'] 	 Quel pays est le plus peuplé, le Canada ou le Chine ?
6 ['The Lord of the Rings [novel series]'] 	 Qui a réalisé le film qui est inspiré du deuxième livre de la série Le Seigneur
7 ['Qui a réalisé le film inspiré du deuxième livre de la série Le Seigneur des anneaux ?'] 	 Qui a réalisé le film inspiré du deuxième livre de la série Le Seigneur des
8 ['Qui a réalisé le film inspiré du deuxième livre de la série Le Seigneur des anneaux ?'] 	

In [24]:
from transformers import Trainer
import torch.nn.functional as F
import torch

class CustomTrainer(Trainer):
    def compute_loss(self, model, inputs, return_outputs=False, **kwargs):  # ✅ Fix: Accept extra arguments
        """Custom loss function to prioritize translation over NER."""
        labels = inputs.pop("labels")  # Extract target labels
        outputs = model(**inputs)
        logits = outputs.logits  # Get logits

        # Compute CrossEntropy loss
        loss = F.cross_entropy(logits.view(-1, logits.size(-1)), labels.view(-1), ignore_index=-100)

        # Assign higher weight to translation (80%) and lower weight to NER (20%)
        ner_weight = 0.2
        translation_weight = 0.8

        # Get task type (default to Translation)
        task_type = inputs.get("task_type", ["Translation"] * logits.shape[0])

        # Convert task type to weight tensor
        task_weights = torch.tensor(
            [ner_weight if "NER" in task else translation_weight for task in task_type],
            device=logits.device,
            dtype=torch.float,
        )

        # Scale loss by task weights
        weighted_loss = loss * task_weights.mean()

        return (weighted_loss, outputs) if return_outputs else weighted_loss


In [25]:
trainer = CustomTrainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset,
    eval_dataset=tokenized_dataset,
    tokenizer=tokenizer,
    data_collator=data_collator,
)

trainer.train()


  trainer = CustomTrainer(


Epoch,Training Loss,Validation Loss
1,0.0442,0.053552


TrainOutput(global_step=1452, training_loss=0.04828366563339864, metrics={'train_runtime': 281.9405, 'train_samples_per_second': 41.19, 'train_steps_per_second': 5.15, 'total_flos': 539686607781888.0, 'train_loss': 0.04828366563339864, 'epoch': 1.0})

In [28]:
def load_test_data(input_path):
    with open(input_path, "r", encoding="utf-8") as f:
        return json.load(f)
test_data_path = "test_split.json"
test_data = load_test_data(test_data_path)[:50]

In [38]:
# Load a sample from test data
sample_input = test_data[1]["input"]
print("Sample Input:", sample_input)
print("Expected Input:",test_data[1]["output"])
# Tokenize input
inputs = tokenizer(sample_input, return_tensors="pt", padding=True, truncation=True)

# Move to GPU if available
inputs = {k: v.to(model.device) for k, v in inputs.items()}

# Generate output
outputs = model.generate(**inputs)

# Decode the output
decoded_output = tokenizer.decode(outputs[0], skip_special_tokens=True)
print("Sample Output:", decoded_output)



Sample Input: Entity translate (EN→FR): How may states touch Lake Michigan?
Expected Input: Combien d’États touchent le lac Michigan ?
Sample Output: Combien d’États touchent le lac Michigan ?


### Using only for ea-mt tasks

In [43]:
import json
import torch
import evaluate
from transformers import T5Tokenizer, T5ForConditionalGeneration
from tqdm import tqdm

# Load model and tokenizer
model_path = "t5_finetuned"
tokenizer = T5Tokenizer.from_pretrained(model_path)
model = T5ForConditionalGeneration.from_pretrained(model_path)

def load_test_data(input_path):
    """Load test data and filter only 'Entity-aware MT' tasks."""
    with open(input_path, "r", encoding="utf-8") as f:
        data = json.load(f)

    # ✅ Keep only samples where task == "Entity-aware MT"
    filtered_data = [sample for sample in data if sample.get("task") == "Entity-aware MT"]
    return filtered_data

# Load filtered test data
test_data_path = "test_split.json"
test_data = load_test_data(test_data_path) # Limit to first 50 samples for quick testing

# ✅ Quick test on one sample
sample_input = test_data[0]["input"]
print("Sample Input:", sample_input)

inputs = tokenizer(sample_input, return_tensors="pt", padding=True, truncation=True)
inputs = {k: v.to(model.device) for k, v in inputs.items()}  # Move to GPU if available

outputs = model.generate(**inputs)
decoded_output = tokenizer.decode(outputs[0], skip_special_tokens=True)
print("Sample Output:", decoded_output)

# ✅ Function to restore entities in translated text
def restore_entities(text, entity_mapping):
    """Replace placeholders in translated text with original entity names."""
    for placeholder, original in entity_mapping.items():
        text = text.replace(placeholder, original)
    return text

# Initialize BLEU evaluation
bleu = evaluate.load("sacrebleu")

predictions, references = [], []

# ✅ Process all test samples
for sample in tqdm(test_data, desc="Evaluating Translations"):
    input_text = sample["input"]
    expected_output = sample["output"]
    entity_mapping = sample.get("enriched_entities", {})

    # Tokenize and generate translation
    inputs = tokenizer(input_text, return_tensors="pt", padding=True, truncation=True)
    inputs = {k: v.to(model.device) for k, v in inputs.items()}  # Move to GPU

    outputs = model.generate(**inputs)
    decoded_output = tokenizer.decode(outputs[0], skip_special_tokens=True)

    # ✅ Restore entity names before BLEU evaluation
    final_output = restore_entities(decoded_output, entity_mapping)
    final_reference = restore_entities(expected_output, entity_mapping)

    predictions.append(final_output)
    references.append([final_reference])  # BLEU expects a list of references

# ✅ Compute BLEU score on restored translations
bleu_score = bleu.compute(predictions=predictions, references=references)
print(f"Updated BLEU Score for Entity-Aware MT: {bleu_score['score']:.2f}")


Sample Input: Entity translate (EN→FR): How may states touch Lake Michigan?
Sample Output: Combien d’États touchent le lac Michigan ?


Evaluating Translations: 100%|███████████████████████████████████████████████████| 1660/1660 [1:41:19<00:00,  3.66s/it]


Updated BLEU Score for Entity-Aware MT: 34.96


### Trying with different models

In [44]:
tokenizer = T5Tokenizer.from_pretrained("google/flan-t5-base")
model = T5ForConditionalGeneration.from_pretrained("google/flan-t5-base")

tokenizer_config.json:   0%|          | 0.00/2.54k [00:00<?, ?B/s]

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development


spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/2.20k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/2.42M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/1.40k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/990M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/147 [00:00<?, ?B/s]

In [45]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)

In [47]:
output_dir = "t5_large_finetuned"

training_args = TrainingArguments(
    output_dir=output_dir,
    evaluation_strategy="epoch",
    save_strategy="epoch",
    learning_rate=3e-4,
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    num_train_epochs=3,
    weight_decay=0.01,
    save_total_limit=2,
    push_to_hub=False,
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset,
    eval_dataset=tokenized_dataset,
    tokenizer=tokenizer,
    data_collator=data_collator,
)

trainer.train()
model.save_pretrained(output_dir)
tokenizer.save_pretrained(output_dir)
print(f"Model saved to {output_dir}")

  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,0.0626,0.041301
2,0.0555,0.020989
3,0.036,0.014206


Model saved to t5_large_finetuned


In [48]:
predictions, references = [], []

# ✅ Process all test samples
for sample in tqdm(test_data, desc="Evaluating Translations"):
    input_text = sample["input"]
    expected_output = sample["output"]
    entity_mapping = sample.get("enriched_entities", {})

    # Tokenize and generate translation
    inputs = tokenizer(input_text, return_tensors="pt", padding=True, truncation=True)
    inputs = {k: v.to(model.device) for k, v in inputs.items()}  # Move to GPU

    outputs = model.generate(**inputs)
    decoded_output = tokenizer.decode(outputs[0], skip_special_tokens=True)

    # ✅ Restore entity names before BLEU evaluation
    final_output = restore_entities(decoded_output, entity_mapping)
    final_reference = restore_entities(expected_output, entity_mapping)

    predictions.append(final_output)
    references.append([final_reference])  # BLEU expects a list of references

# ✅ Compute BLEU score on restored translations
bleu_score = bleu.compute(predictions=predictions, references=references)
print(f"Updated BLEU Score for Entity-Aware MT: {bleu_score['score']:.2f}")

Evaluating Translations: 100%|█████████████████████████████████████████████████████| 1660/1660 [16:23<00:00,  1.69it/s]


Updated BLEU Score for Entity-Aware MT: 42.57


In [49]:
!pip install transformers sentencepiece



In [50]:
from transformers import MBartForConditionalGeneration, MBartTokenizer

model_name = "facebook/mbart-large-50"
tokenizer = MBartTokenizer.from_pretrained(model_name)
model = MBartForConditionalGeneration.from_pretrained(model_name)


tokenizer_config.json:   0%|          | 0.00/531 [00:00<?, ?B/s]

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development


sentencepiece.bpe.model:   0%|          | 0.00/5.07M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/649 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/1.42k [00:00<?, ?B/s]

The tokenizer class you load from this checkpoint is not the same type as the class this function is called from. It may result in unexpected tokenization. 
The tokenizer class you load from this checkpoint is 'MBart50Tokenizer'. 
The class this function is called from is 'MBartTokenizer'.


pytorch_model.bin:   0%|          | 0.00/2.44G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/261 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/2.44G [00:00<?, ?B/s]

In [54]:
test_data[:5]

[{'task': 'Entity-aware MT',
  'input': 'Entity translate (EN→FR): How may states touch Lake Michigan?',
  'output': 'Combien d’États touchent le lac Michigan ?'},
 {'task': 'Entity-aware MT',
  'input': 'Entity translate (EN→FR): Which country has a larger population, Canada or China?',
  'output': 'Quel est le pays le plus peuplé, le Canada ou la Chine ?'},
 {'task': 'Entity-aware MT',
  'input': 'Entity translate (EN→FR): Who directed the movie that is based on the second book of The Lord of the Rings series?',
  'output': 'Qui a réalisé le film inspiré du deuxième livre de la série Le Seigneur des anneaux ?'},
 {'task': 'Entity-aware MT',
  'input': 'Entity translate (EN→FR): Which country did not ratify the Treaty of Versailles?',
  'output': 'Quel pays n’a pas ratifié le traité de Versailles ?'},
 {'task': 'Entity-aware MT',
  'input': 'Entity translate (EN→FR): Who wrote Neuromancer?',
  'output': 'Qui a écrit le livre Neuromancien ?'}]

In [51]:
import json
import torch
import evaluate
import pandas as pd
from transformers import T5Tokenizer, T5ForConditionalGeneration
from tqdm import tqdm

# Load model and tokenizer
model_path = "t5_large_finetuned"
tokenizer = T5Tokenizer.from_pretrained(model_path)
model = T5ForConditionalGeneration.from_pretrained(model_path)

In [58]:
def preprocess_data(data):
    """Convert data to the required test format."""
    formatted_data = []
    for sample in data:
        source_text = sample["source"]
        target_text = sample["target"]
        
        formatted_data.append({
            "task": "Entity-aware MT",
            "input": f"Entity translate (EN→FR): {source_text}",
            "output": target_text
        })
    
    return formatted_data

def filter_test_data(test_data):
    """Filter only 'Entity-aware MT' tasks from the data."""
    return [sample for sample in test_data if sample.get("task") == "Entity-aware MT"]

def restore_entities(text, entity_mapping):
    """Replace placeholders in translated text with original entity names."""
    for placeholder, original in entity_mapping.items():
        text = text.replace(placeholder, original)
    return text

# Initialize BLEU evaluation
bleu = evaluate.load("sacrebleu")

def evaluate_translations(df_test):
    """Evaluate translations using a Pandas DataFrame as input."""
    test_data = preprocess_data(df_test.to_dict(orient='records'))
    filtered_data = filter_test_data(test_data)
    
    if not filtered_data:
        print("Error: No 'Entity-aware MT' samples found in the test data.")
        return None
    
    predictions, references = [], []
    
    for sample in tqdm(filtered_data, total=len(filtered_data), desc="Evaluating Translations"):
        input_text = sample["input"]
        expected_output = sample["output"]
        entity_mapping = sample.get("enriched_entities", {})

        # Tokenize and generate translation
        inputs = tokenizer(input_text, return_tensors="pt", padding=True, truncation=True)
        inputs = {k: v.to(model.device) for k, v in inputs.items()}  # Move to GPU
        
        outputs = model.generate(**inputs)
        decoded_output = tokenizer.decode(outputs[0], skip_special_tokens=True)

        # Restore entity names before BLEU evaluation
        final_output = restore_entities(decoded_output, entity_mapping)
        final_reference = restore_entities(expected_output, entity_mapping)

        predictions.append(final_output)
        references.append([final_reference])  # BLEU expects a list of references

    # Compute BLEU score
    if not predictions or not references:
        print("Error: No predictions or references available for BLEU evaluation.")
        return None

    bleu_score = bleu.compute(predictions=predictions, references=references)
    print(f"Updated BLEU Score for Entity-Aware MT: {bleu_score['score']:.2f}")

    return bleu_score

In [63]:
df_test = pd.read_csv("validation_french.csv")

In [65]:
df_test.head()

Unnamed: 0.1,Unnamed: 0,wikidata_id,source,target,entity
0,0,Q100231013,What is the scope of the Statistical Classific...,Quelle est la portée de la Nomenclature statis...,Statistical Classification of Economic Activit...
1,1,Q100231013,How is the Statistical Classification of Econo...,Comment la Nomenclature statistique des activi...,Statistical Classification of Economic Activit...
2,2,Q100231013,Why is the Statistical Classification of Econo...,Pourquoi la Nomenclature statistique des activ...,Statistical Classification of Economic Activit...
3,3,Q1040143,How does Pokémon evolution work?,Comment fonctionne l'évolution des Pokémon?,Pokémon evolution
4,4,Q1040143,What is Pokémon evolution?,Qu'est-ce que l'évolution des Pokémon?,Pokémon evolution


In [66]:
evaluate_translations(df_test)

Evaluating Translations: 100%|███████████████████████████████████████████████████████| 724/724 [11:25<00:00,  1.06it/s]

Updated BLEU Score for Entity-Aware MT: 17.87





{'score': 17.874177858127915,
 'counts': [3794, 1669, 865, 424],
 'totals': [7404, 6680, 5956, 5232],
 'precisions': [51.24257158292815,
  24.98502994011976,
  14.523169912693083,
  8.103975535168196],
 'bp': 0.9072113866136169,
 'sys_len': 7404,
 'ref_len': 8125}