In [1]:
import torch
from pytorch_pretrained_bert import BertTokenizer, BertModel, BertForMaskedLM

# OPTIONAL: if you want to have more information on what's happening, activate the logger as follows
import logging
logging.basicConfig(level=logging.INFO)

# Load pre-trained model tokenizer (vocabulary)
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

# Tokenized input
text = "[CLS] Who was Jim Henson ? [SEP] Jim Henson was a puppeteer [SEP]"
tokenized_text = tokenizer.tokenize(text)

# Mask a token that we will try to predict back with `BertForMaskedLM`
masked_index = 8
tokenized_text[masked_index] = '[MASK]'
assert tokenized_text == ['[CLS]', 'who', 'was', 'jim', 'henson', '?', '[SEP]', 'jim', '[MASK]', 'was', 'a', 'puppet', '##eer', '[SEP]']

# Convert token to vocabulary indices
indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text)
# Define sentence A and B indices associated to 1st and 2nd sentences (see paper)
segments_ids = [0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1]

# Convert inputs to PyTorch tensors
tokens_tensor = torch.tensor([indexed_tokens])
segments_tensors = torch.tensor([segments_ids])

INFO:pytorch_pretrained_bert.tokenization:loading vocabulary file https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-uncased-vocab.txt from cache at C:\Users\pedro\.pytorch_pretrained_bert\26bc1ad6c0ac742e9b52263248f6d0f00068293b33709fae12320c0e35ccfbbb.542ce4285a40d23a559526243235df47c5f75c197f04f37d1a0c124c32c9a084


In [2]:
# Load pre-trained model (weights)
model = BertModel.from_pretrained('bert-base-uncased')
model.eval()

# If you have a GPU, put everything on cuda
tokens_tensor = tokens_tensor.to('cuda')
segments_tensors = segments_tensors.to('cuda')
model.to('cuda')

# Predict hidden states features for each layer
with torch.no_grad():
    encoded_layers, _ = model(tokens_tensor, segments_tensors)
# We have a hidden states for each of the 12 layers in model bert-base-uncased
assert len(encoded_layers) == 12

INFO:pytorch_pretrained_bert.modeling:loading archive file https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-uncased.tar.gz from cache at C:\Users\pedro\.pytorch_pretrained_bert\9c41111e2de84547a463fd39217199738d1e3deb72d4fec4399e6e241983c6f0.ae3cef932725ca7a30cdcb93fc6e09150a55e2a130ec7af63975a16c153ae2ba
INFO:pytorch_pretrained_bert.modeling:extracting archive file C:\Users\pedro\.pytorch_pretrained_bert\9c41111e2de84547a463fd39217199738d1e3deb72d4fec4399e6e241983c6f0.ae3cef932725ca7a30cdcb93fc6e09150a55e2a130ec7af63975a16c153ae2ba to temp dir C:\Users\pedro\AppData\Local\Temp\tmpebij7_e3
INFO:pytorch_pretrained_bert.modeling:Model config {
  "attention_probs_dropout_prob": 0.1,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "max_position_embeddings": 512,
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "type_vocab_size": 2,
  "vocab_size": 30522
}



In [5]:
encoded_layers

[tensor([[[ 0.1779, -0.1300, -0.1322,  ..., -0.0523,  0.0398,  0.1696],
          [ 0.0015,  0.6659, -0.2717,  ...,  0.0858,  0.0752, -0.7998],
          [-0.3025, -0.7966,  0.2017,  ..., -0.2160,  0.1442,  0.1139],
          ...,
          [ 0.9641, -0.2486, -0.5867,  ..., -0.2504, -0.1085,  0.9230],
          [ 1.0382,  0.2008, -0.5151,  ...,  0.2307, -0.2571,  0.0853],
          [-0.1584,  0.2854,  0.1234,  ...,  0.0381,  0.6913,  0.1684]]],
        device='cuda:0'),
 tensor([[[ 0.0990, -0.0730, -0.1330,  ..., -0.1048, -0.0753,  0.0759],
          [-0.3714,  0.7424, -0.4032,  ...,  0.3164, -0.2033, -1.2496],
          [-0.2573, -0.0635,  0.0321,  ..., -0.4019,  0.1892, -0.4282],
          ...,
          [ 0.9812,  0.2636, -0.0527,  ..., -0.1582,  0.2423,  0.7502],
          [ 1.3446,  0.3007, -0.3358,  ...,  0.4241, -0.2855, -0.5207],
          [-0.2775,  0.2715,  0.2500,  ...,  0.0594,  0.5329,  0.0695]]],
        device='cuda:0'),
 tensor([[[-4.0789e-02,  4.4493e-02, -2.2129e-02, 

In [3]:
# Load pre-trained model (weights)
model = BertForMaskedLM.from_pretrained('bert-base-uncased')
model.eval()

# If you have a GPU, put everything on cuda
tokens_tensor = tokens_tensor.to('cuda')
segments_tensors = segments_tensors.to('cuda')
model.to('cuda')

# Predict all tokens
with torch.no_grad():
    predictions = model(tokens_tensor, segments_tensors)

# confirm we were able to predict 'henson'
predicted_index = torch.argmax(predictions[0, masked_index]).item()
predicted_token = tokenizer.convert_ids_to_tokens([predicted_index])[0]
assert predicted_token == 'henson'

INFO:pytorch_pretrained_bert.modeling:loading archive file https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-uncased.tar.gz from cache at C:\Users\pedro\.pytorch_pretrained_bert\9c41111e2de84547a463fd39217199738d1e3deb72d4fec4399e6e241983c6f0.ae3cef932725ca7a30cdcb93fc6e09150a55e2a130ec7af63975a16c153ae2ba
INFO:pytorch_pretrained_bert.modeling:extracting archive file C:\Users\pedro\.pytorch_pretrained_bert\9c41111e2de84547a463fd39217199738d1e3deb72d4fec4399e6e241983c6f0.ae3cef932725ca7a30cdcb93fc6e09150a55e2a130ec7af63975a16c153ae2ba to temp dir C:\Users\pedro\AppData\Local\Temp\tmpqrp8vwv6
INFO:pytorch_pretrained_bert.modeling:Model config {
  "attention_probs_dropout_prob": 0.1,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "max_position_embeddings": 512,
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "type_vocab_size": 2,
  "vocab_size": 30522
}

INFO:pytorch_pretrained_b

In [7]:
predicted_token

'henson'

In [10]:
len(encoded_layers)

12

In [24]:
len(encoded_layers[11][0][8])

768

In [39]:
encoded_layers[11][0][8]

tensor([ 3.3325e-01,  1.6397e-01,  2.1220e-01,  2.4581e-01,  3.5942e-01,
        -1.8511e-01, -8.9237e-02,  5.9655e-01, -2.3936e-02,  4.3561e-01,
        -6.5029e-02, -2.2378e-02, -7.8029e-02,  8.6534e-02, -3.9485e-01,
         2.8866e-01,  4.2361e-01,  2.6790e-02, -2.1295e-01, -5.3584e-01,
         2.5959e-01, -2.5565e-01,  2.1842e-01, -1.5863e-01,  5.7914e-01,
         6.2407e-01, -1.3453e-01,  2.0321e-01, -1.3450e-01,  1.3699e-01,
         2.9377e-01, -6.2752e-01,  2.2628e-01, -5.2016e-01, -1.1211e-01,
         3.5283e-01, -1.1038e-02, -2.6902e-01, -3.1812e-01,  8.3966e-02,
        -6.7615e-01,  1.4772e-02,  4.8851e-02,  5.5387e-01,  1.0909e-01,
        -8.9501e-02, -1.3482e-01, -2.9250e-01, -1.5335e-01, -2.5051e-01,
        -6.2847e-01, -2.7293e-01, -2.7769e-01, -2.2177e-02,  8.9669e-02,
        -2.0880e-01,  1.2952e-01,  4.5479e-03,  5.3365e-01, -2.0591e-01,
         1.1390e-01, -1.4905e-01, -2.7777e-02, -4.7949e-01, -3.4620e-01,
        -1.0370e-01,  4.0764e-01,  2.3562e-01,  1.6