In [1]:
import torch
import transformers

#device = 'cpu'
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [2]:
from transformers import MarianMTModel, MarianTokenizer
en_ROMANCE_model_name = 'Helsinki-NLP/opus-mt-en-ROMANCE'
en_ROMANCE_tokenizer = MarianTokenizer.from_pretrained(en_ROMANCE_model_name)
', '.join(en_ROMANCE_tokenizer.supported_language_codes)

'>>fr<<, >>es<<, >>it<<, >>pt<<, >>pt_br<<, >>ro<<, >>ca<<, >>gl<<, >>pt_BR<<, >>la<<, >>wa<<, >>fur<<, >>oc<<, >>fr_CA<<, >>sc<<, >>es_ES<<, >>es_MX<<, >>es_AR<<, >>es_PR<<, >>es_UY<<, >>es_CL<<, >>es_CO<<, >>es_CR<<, >>es_GT<<, >>es_HN<<, >>es_NI<<, >>es_PA<<, >>es_PE<<, >>es_VE<<, >>es_DO<<, >>es_EC<<, >>es_SV<<, >>an<<, >>pt_PT<<, >>frp<<, >>lad<<, >>vec<<, >>fr_FR<<, >>co<<, >>it_IT<<, >>lld<<, >>lij<<, >>lmo<<, >>nap<<, >>rm<<, >>scn<<, >>mwl<<'

In [3]:
en_ROMANCE = MarianMTModel.from_pretrained(en_ROMANCE_model_name).to(device)

In [4]:
ROMANCE_en_model_name = 'Helsinki-NLP/opus-mt-ROMANCE-en'
ROMANCE_en_tokenizer = MarianTokenizer.from_pretrained(ROMANCE_en_model_name)

In [5]:
ROMANCE_en = MarianMTModel.from_pretrained(ROMANCE_en_model_name).to(device)

# Batch translation

In [6]:
def translate(tokenizer, model, text, num_outputs):
    """Use beam search to get a reasonable translation of 'text'"""
    batch = tokenizer.prepare_translation_batch([text]).to(model.device)
    num_beams = num_outputs
    translated = model.generate(**batch, num_beams=num_beams, num_return_sequences=num_outputs)
    return [tokenizer.decode(t, skip_special_tokens=True, clean_up_tokenization_spaces=False) for t in translated]

translate(en_ROMANCE_tokenizer, en_ROMANCE, ">>es<< I ran to the store.", 5)
    

['Corrí a la tienda.',
 'Yo corrí a la tienda.',
 'Corri a la tienda.',
 'Corre a la tienda.',
 'Corrí hasta la tienda.']

# Incremental translation

English to token ids

In [7]:
tokenizer = en_ROMANCE_tokenizer
model = en_ROMANCE

In [8]:
english = ">>es<< I ran to the store."
input_ids = tokenizer.encode(english, return_tensors="pt").to(device)

English token ids to "concept space"

In [9]:
batch = tokenizer.prepare_translation_batch([english]).to(device)
english_encoded = model.get_encoder()(**batch)

What we have so far, as token ids:

In [45]:
decoder_start_token = model.config.decoder_start_token_id
decoder_start_token

65000

In [46]:
partial_decode = torch.LongTensor([decoder_start_token]).to(device).unsqueeze(0)

Ask the model for what comes next

In [47]:
# one-time setup
past = (english_encoded, None)

In [47]:
model_inputs = model.prepare_inputs_for_generation(
    partial_decode, past=past, attention_mask=batch['attention_mask'], use_cache=model.config.use_cache
)
with torch.no_grad():
    model_outputs = model(**model_inputs)

next_token_logits = model_outputs[0][:, -1, :]
past = model_outputs[1]

Find the most likely

In [48]:
next_token_to_add = next_token_logits[0].argmax()

Notice that this is a one-item tensor (zero-dim)... so it can't concat with anything:

In [49]:
next_token_to_add.shape

torch.Size([])

But this is the kind of shape that it needs to have:

In [50]:
partial_decode.shape

torch.Size([1, 1])

So here's how to give it that extra dimension:

In [51]:
next_token_to_add.unsqueeze(0).unsqueeze(0)

tensor([[10509]], device='cuda:0')

Ok, those shapes align, so we can concatenate them.

In [52]:
partial_decode = torch.cat((partial_decode, next_token_to_add.unsqueeze(0).unsqueeze(0)), -1)

In [53]:
partial_decode

tensor([[65000, 10509]], device='cuda:0')

Now we have a new output, with one additional token:

In [54]:
tokenizer.convert_ids_to_tokens(partial_decode[0])

['<pad>', '▁Corr']

We can now do all that again, to ask for the next thing after that token.

In [55]:
model_inputs = model.prepare_inputs_for_generation(
    partial_decode, past=past, attention_mask=batch['attention_mask'], use_cache=model.config.use_cache
)
with torch.no_grad():
    model_outputs = model(**model_inputs)

next_token_logits = model_outputs[0][:, -1, :]
past = model_outputs[1]