In [4]:
import torch
from transformers import AutoTokenizer
import evaluate
from safetensors.torch import load_file

from model import Transformer, TransformerConfig
from tokenizer import FrenchTokenizer

path_to_model_safetensor = "work_dir/Seq2Seq_Neural_Machine_Translation/checkpoint_150000/model.safetensors"

## Load Model and Tokenizers

In [6]:
### Load Model ###
config = TransformerConfig()
model = Transformer(config)

### Load Pretrained Weights ###
weight_dict = load_file(path_to_model_safetensor)
model.load_state_dict(weight_dict)
model.eval()

### Load Tokenizers ###
tgt_tokenizer =  FrenchTokenizer("trained_tokenizer/french_wp.json")
src_tokenizer = AutoTokenizer.from_pretrained("google-bert/bert-base-uncased")

### Lets try to Translate Something!

In [7]:
### Predict Translation from English ###
sample_sentence = "I want to learn something new tonight!"

src_ids = torch.tensor(src_tokenizer(sample_sentence)["input_ids"][:config.max_src_len]).unsqueeze(0)
translated = model.inference(src_ids, 
                             tgt_start_id=tgt_tokenizer.special_tokens_dict["[BOS]"],
                             tgt_end_id=tgt_tokenizer.special_tokens_dict["[EOS]"])
prediction = tgt_tokenizer.decode(translated, skip_special_tokens=True)

print("French Translation:", prediction)

French Translation: je veux apprendre quelque chose de nouveau ce soir!


So I dont know a word of french, but we can try to drop our french prediction into Google Translate and see what we get in English:

<img src="src/google_translate.png"  width="500"/>

Looks like it worked! Not too bad for our model at all!