#### Basic usage of the API

In [1]:
import torch
from pytorch_transformers import *

In [2]:
# PyTorch-Transformers has a unified API
# for 6 transformer architectures and 27 pretrained weights.
#          Model          | Tokenizer          | Pretrained weights shortcut
MODELS = [(BertModel,       BertTokenizer,      'bert-base-uncased'),
          (OpenAIGPTModel,  OpenAIGPTTokenizer, 'openai-gpt'),
          (GPT2Model,       GPT2Tokenizer,      'gpt2'),
          (TransfoXLModel,  TransfoXLTokenizer, 'transfo-xl-wt103'),
          (XLNetModel,      XLNetTokenizer,     'xlnet-base-cased'),
          (XLMModel,        XLMTokenizer,       'xlm-mlm-enfr-1024')]

##### Load model and tokenizer

In [3]:
# Let's encode some text in a sequence of hidden-states using each model:
model_class, tokenizer_class, pretrained_weights = MODELS[4]
# Load pretrained model/tokenizer
tokenizer = tokenizer_class.from_pretrained(pretrained_weights)
model = model_class.from_pretrained(pretrained_weights)

# Encode text
input_ids = torch.tensor([tokenizer.encode("Here is some text to encode")])
last_hidden_states = model(input_ids)[0]  # Models outputs are now tuples

100%|██████████| 798011/798011 [00:00<00:00, 30327281.31B/s]
100%|██████████| 641/641 [00:00<00:00, 485911.60B/s]
100%|██████████| 467042463/467042463 [00:09<00:00, 49101349.34B/s]


In [5]:
tokenizer_class.from_pretrained??

In [6]:
# Models can return full list of hidden-states & attentions weights at each layer
model = model_class.from_pretrained(pretrained_weights,
                                    output_hidden_states=True,
                                    output_attentions=True)
input_ids = torch.tensor([tokenizer.encode("Let's see all hidden-states and attentions on this text")])
all_hidden_states, all_attentions = model(input_ids)[-2:]

In [7]:
print(len(all_hidden_states))
print(all_hidden_states[0].shape)

13
torch.Size([1, 15, 768])


#### Simple serialization for models and tokenizers

In [13]:
save_model_path ='../model_weights/'
model.save_pretrained(save_model_path)  # save
model = model_class.from_pretrained(save_model_path)  # re-load
tokenizer.save_pretrained(save_model_path)  # save
tokenizer = tokenizer_class.from_pretrained(save_model_path)

#### Load a finetuned LM model

In [9]:
# Models can return full list of hidden-states & attentions weights at each layer
lm_model_path ='../data/LM_finetune_Macro/finetuned_lm'
tokenizer = BertTokenizer.from_pretrained(lm_model_path)
model_class = BertModel
model = model_class.from_pretrained(lm_model_path,
                                    output_hidden_states=True,
                                    output_attentions=True)
input_ids = torch.tensor([tokenizer.encode("Let's see all hidden-states and attentions on this text")])
all_hidden_states, all_attentions = model(input_ids)[-2:]

In [11]:
print(len(all_hidden_states))
print(all_hidden_states[0].shape)

13
torch.Size([1, 14, 768])
