In [24]:
import torch
from transformers import pipeline, AutoTokenizer, AutoModelForMaskedLM

In [9]:
checkpoint = "camembert-base"

In [11]:
sequence1 = "Le camembert est <mask> :)"

# Pipeline

In [12]:
camembert_fill_mask = pipeline("fill-mask", model=checkpoint)

Some weights of the model checkpoint at camembert-base were not used when initializing CamembertForMaskedLM: ['roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
- This IS expected if you are initializing CamembertForMaskedLM from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing CamembertForMaskedLM from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [13]:
results = camembert_fill_mask(sequence1)

In [14]:
results

[{'score': 0.4909103214740753,
  'token': 7200,
  'token_str': 'délicieux',
  'sequence': 'Le camembert est délicieux :)'},
 {'score': 0.10556960850954056,
  'token': 2183,
  'token_str': 'excellent',
  'sequence': 'Le camembert est excellent :)'},
 {'score': 0.034533217549324036,
  'token': 26202,
  'token_str': 'succulent',
  'sequence': 'Le camembert est succulent :)'},
 {'score': 0.0330313965678215,
  'token': 528,
  'token_str': 'meilleur',
  'sequence': 'Le camembert est meilleur :)'},
 {'score': 0.03007640317082405,
  'token': 1654,
  'token_str': 'parfait',
  'sequence': 'Le camembert est parfait :)'}]

# Tokenizer & model

In [15]:
tokenizer = AutoTokenizer.from_pretrained(checkpoint)
model = AutoModelForMaskedLM.from_pretrained(checkpoint)

Some weights of the model checkpoint at camembert-base were not used when initializing CamembertForMaskedLM: ['roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
- This IS expected if you are initializing CamembertForMaskedLM from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing CamembertForMaskedLM from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [25]:
prepped_input = tokenizer(sequence1, return_tensors="pt")
with torch.no_grad():
    output = model(**prepped_input)

## Convert raw predictions 

In [77]:
def pred_top_k_words(logits, input_ids):
    mask_token_idx = torch.where(input_ids == tokenizer.mask_token_id)[1]
    mask_token_logits = logits[0, mask_token_idx]
    top_k_tokens = torch.topk(mask_token_logits, k, dim=-1).indices[0].tolist()
    top_k_words = [tokenizer.decode([token]) for token in top_k_tokens]
    return top_k_words

In [78]:
top_k_words = pred_top_k_words(output.logits, prepped_input.input_ids)

In [79]:
for word in top_k_words:
    print(sequence1.replace(tokenizer.mask_token, word))

Le camembert est délicieux :)
Le camembert est excellent :)
Le camembert est succulent :)
Le camembert est meilleur :)
Le camembert est parfait :)
