References:
- [Using a model with custom code](https://huggingface.co/docs/transformers/v4.39.1/custom_models#using-a-model-with-custom-code)

In [4]:
from transformers import AutoModel

commit_hash = "e857b0518ae6d8dd6d52f7f8575d85a39ee7f81f"
model = AutoModel.from_pretrained("gretakate/english-to-italian-transformer-from-scratch", trust_remote_code=True, revision=commit_hash)

In [9]:
from tokenizers import Tokenizer

tokenizer_src = Tokenizer.from_file(str('vocab/tokenizeren.json'))
tokenizer_tgt = Tokenizer.from_file(str('vocab/tokenizerit.json'))

model.config.decoder_start_token_id = tokenizer_tgt.token_to_id("[SOS]")
model.config.pad_token_id = tokenizer_tgt.token_to_id("[PAD]")
model.config.eos_token_id = tokenizer_tgt.token_to_id("[EOS]")

In [10]:
import torch

sentence = "Translate this sentence to italian for me."

source = tokenizer_src.encode(sentence)
source = torch.cat([
    torch.tensor([tokenizer_src.token_to_id('[SOS]')], dtype=torch.int64), 
    torch.tensor(source.ids, dtype=torch.int64),
    torch.tensor([tokenizer_src.token_to_id('[EOS]')], dtype=torch.int64),
    torch.tensor([tokenizer_src.token_to_id('[PAD]')] * (model.config.seq_len - len(source.ids) - 2), dtype=torch.int64)
], dim=0)

# Mask out all of the padding tokens
source_mask = (source != tokenizer_src.token_to_id('[PAD]')).unsqueeze(0).unsqueeze(0).int()

results = model(input_ids=source, attention_mask=source_mask)

In [11]:
tokenizer_tgt.decode(results.logits[0].tolist())

'La termin√≤ di spiegare le lezioni di lezione mi spense di nuovo .'