In [1]:
import torch
import pytorch_transformers

In [2]:
print("Pytorch version:     ", torch.__version__)
print("Transformers version:", pytorch_transformers.__version__)
print("")
print("GPU available:", torch.cuda.is_available())
print("GPU name:     ", torch.cuda.get_device_name(0))

Pytorch version:      1.2.0
Transformers version: 1.2.0

GPU available: True
GPU name:      GeForce GTX 1080 Ti


### Model: Transformer-XL

In [2]:
tokenizer = pytorch_transformers.TransfoXLTokenizer.from_pretrained('transfo-xl-wt103')
model     = pytorch_transformers.TransfoXLModel.from_pretrained('transfo-xl-wt103')
modelLM   = pytorch_transformers.TransfoXLLMHeadModel.from_pretrained('transfo-xl-wt103')

### Model: GPT-2

In [3]:
tokenizer = pytorch_transformers.GPT2Tokenizer.from_pretrained('gpt2')
model     = pytorch_transformers.GPT2Model.from_pretrained('gpt2')
modelLM   = pytorch_transformers.GPT2LMHeadModel.from_pretrained('gpt2')

### Model: BERT

In [3]:
tokenizer = pytorch_transformers.BertTokenizer.from_pretrained('bert-base-uncased')
model     = pytorch_transformers.BertModel.from_pretrained('bert-base-uncased')
modelLM   = pytorch_transformers.BertForMaskedLM.from_pretrained('bert-base-uncased')

### Model: BERT multilingual

In [3]:
tokenizer = pytorch_transformers.BertTokenizer.from_pretrained('bert-base-multilingual-cased')
model     = pytorch_transformers.BertModel.from_pretrained('bert-base-multilingual-cased')
modelLM   = pytorch_transformers.BertForMaskedLM.from_pretrained('bert-base-multilingual-cased')

100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████| 995526/995526 [00:00<00:00, 1178839.72B/s]
100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 521/521 [00:00<00:00, 261297.67B/s]
100%|███████████████████████████████████████████████████████████████████████████████████████████████████| 714314041/714314041 [01:29<00:00, 7957622.98B/s]


### Model: XLM multilingual

In [3]:
tokenizer = pytorch_transformers.XLMTokenizer.from_pretrained('xlm-mlm-xnli15-1024')
model     = pytorch_transformers.XLMModel.from_pretrained('xlm-mlm-xnli15-1024')
modelLM   = pytorch_transformers.XLMWithLMHeadModel.from_pretrained('xlm-mlm-xnli15-1024')

100%|███████████████████████████████████████████████████████████████████████████████████████████████████████| 2952532/2952532 [00:01<00:00, 2579497.52B/s]
100%|███████████████████████████████████████████████████████████████████████████████████████████████████████| 1434601/1434601 [00:00<00:00, 1625326.66B/s]
100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 775/775 [00:00<00:00, 88552.51B/s]
100%|█████████████████████████████████████████████████████████████████████████████████████████████████| 1385437546/1385437546 [05:15<00:00, 4385069.04B/s]


### Model: RoBERTa

In [3]:
tokenizer = pytorch_transformers.RobertaTokenizer.from_pretrained('roberta-base')
model     = pytorch_transformers.RobertaModel.from_pretrained('roberta-base')
modelLM   = pytorch_transformers.RobertaForMaskedLM.from_pretrained('roberta-base')

### Model: DistilBERT

In [2]:
tokenizer = pytorch_transformers.DistilBertTokenizer.from_pretrained('distilbert-base-uncased')
model     = pytorch_transformers.DistilBertModel.from_pretrained('distilbert-base-uncased')
modelLM   = pytorch_transformers.DistilBertForMaskedLM.from_pretrained('distilbert-base-uncased')

# Input text

In [10]:
text = "I have two dogs and one dog is a puppeteer"
text = "Me llamo Juan y me gustan las"
text = "Me gustan los tacos, y te quiero"

tokens_text       = tokenizer.tokenize(text)
tokens_ids        = tokenizer.convert_tokens_to_ids(tokens_text)
tokens_ids_tensor = torch.tensor([tokens_ids])
tokens_text, tokens_ids

(['ĠMe', 'Ġgust', 'an', 'Ġlos', 'Ġtacos', ',', 'Ġy', 'Ġte', 'Ġqu', 'ier', 'o'],
 [2185, 35253, 272, 22346, 44058, 11, 331, 573, 627, 959, 78])

### Model output 

In [11]:
print("vertor size:", model.config.hidden_size)

vertor size: 768


In [12]:
model.eval()
outputs = model(tokens_ids_tensor)[0]
outputs.shape

torch.Size([1, 11, 768])

### Let's do some next token prediction

In [13]:
model.eval()
predictions = model(tokens_ids_tensor)[0]
predictions.shape

torch.Size([1, 11, 768])

In [14]:
predicted_index = torch.argmax(predictions[0, -1, :]).item()
predicted_index

496

In [15]:
predicted_text = tokenizer.decode([predicted_index])
predicted_text

'age'

In [17]:
text      = "I have two dogs and one is a"
input     = torch.tensor([tokenizer.convert_tokens_to_ids(tokenizer.tokenize(text))])
input

tensor([[1045, 2031, 2048, 6077, 1998, 2028, 2003, 1037]])

In [21]:
################################################# OLD
text      = "I have two dogs and one is a"
input     = torch.tensor([tokenizer.convert_tokens_to_ids(tokenizer.tokenize(text))])
next_word = torch.argmax(model(input)[0][0, -1])
tokenizer.convert_ids_to_tokens([next_word.item()])[0]

'puppy'