# Masked Language Modeling

## Importing the libraries

In [28]:
from transformers import AutoTokenizer, AutoModelForMaskedLM
from scipy.special import softmax
import numpy as np

## Defining the model name and tokenizer

In [38]:
model_name = "bert-base-cased"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForMaskedLM.from_pretrained(model_name)

Some weights of the model checkpoint at bert-base-cased were not used when initializing BertForMaskedLM: ['cls.seq_relationship.weight', 'bert.pooler.dense.weight', 'cls.seq_relationship.bias', 'bert.pooler.dense.bias']
- This IS expected if you are initializing BertForMaskedLM from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForMaskedLM from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


## Creating the mask

In [50]:
mask = tokenizer.mask_token
print(mask)

sentence = f"How many people can you fit inside of a {mask}?"

tokens = tokenizer.tokenize(sentence)

encoded_inputs = tokenizer(sentence, return_tensors="pt")

outputs = model(**encoded_inputs)

logits = outputs.logits.detach().numpy()[0]
# logits are like the score that the model gives each word on its ability to fit into the mask
print(logits.shape)
# The first number is the number of tokens in the string (8 plus start and end), the second is the total vocabulary size of the model

[MASK]
(13, 28996)


## Predict the most likely token to replace the masked token

In [51]:
mask_logits = logits[tokens.index(mask) + 1]
confidence_scores = softmax(mask_logits)

for i in np.argsort(confidence_scores)[::-1][:5]:
    pred_token = tokenizer.decode(i)
    score = confidence_scores[i]
    print(sentence.replace(mask, pred_token), score)

How many people can you fit inside of a car? 0.17404005
How many people can you fit inside of a house? 0.14402431
How many people can you fit inside of a room? 0.055196967
How many people can you fit inside of a building? 0.051325884
How many people can you fit inside of a box? 0.028974824
