### practice for `convert_bert_original_tf_checkpoint_to_pytorch.py`
first, download the pre-trained model(tf ver.) from google(https://github.com/google-research/bert#bert)  \
I just downloaded *bert-small ver.* and it has `bert_model.ckpt` ( `bert_model.ckpt.data-...`, `bert_model.ckpt.index` ), `vocab.txt`, and `bert_config.json`. \

In [30]:
import torch
from transformers import BertConfig, BertForPreTraining, load_tf_weights_in_bert

#### Initialize PyTorch model

In [23]:
# Initialize PyTorch model
BERT_CONFIG_FILE = 'bert-small/bert_config.json'
config = BertConfig.from_json_file(json_file=BERT_CONFIG_FILE)
print("Building PyTorch model from configuration: {}".format(str(config)))
model = BertForPreTraining(config)

Building PyTorch model from configuration: BertConfig {
  "attention_probs_dropout_prob": 0.1,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 512,
  "initializer_range": 0.02,
  "intermediate_size": 2048,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 8,
  "num_hidden_layers": 4,
  "pad_token_id": 0,
  "type_vocab_size": 2,
  "vocab_size": 30522
}



#### Load weights from tf checkpoint

In [29]:
# Load weights from tf checkpoint
TF_CHECKPOINT_PATH = 'bert-small/bert_model.ckpt'
load_tf_weights_in_bert(model, config, tf_checkpoint_path=TF_CHECKPOINT_PATH)

BertForPreTraining(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30522, 512, padding_idx=0)
      (position_embeddings): Embedding(512, 512)
      (token_type_embeddings): Embedding(2, 512)
      (LayerNorm): LayerNorm((512,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0): BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=512, out_features=512, bias=True)
              (key): Linear(in_features=512, out_features=512, bias=True)
              (value): Linear(in_features=512, out_features=512, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=512, out_features=512, bias=True)
              (LayerNorm): LayerNorm((512,), eps=1e-12, elementwise_affine

#### Save pytorch-model

In [39]:
# Save pytorch-model
PYTORCH_DUMP_PATH = 'bert-small/pytorch-bert-small.bin'
print("Save PyTorch model to {}".format(PYTORCH_DUMP_PATH))
torch.save(model.state_dict(), f=PYTORCH_DUMP_PATH)

Save PyTorch model to bert-small/pytorch-bert-small.bin


<br/>

<br/>

---

<br/>

#### Re-checking for load pre-trained model in pytorch
rename each files ( `vocab.txt`, `pytorch_model.bin` (src from model.ckpt), `config.json`)

In [73]:
BERT_CONFIG_FILE  = 'pytorch-bert-small/config.json'
BERT_BIN_FILE     = 'pytorch-bert-small/pytorch_model.bin'
OUTPUT_VOCAB_FILE = 'pytorch-bert-small/vocab.txt'

In [62]:
config = BertConfig.from_json_file(json_file=BERT_CONFIG_FILE)
# print("Building PyTorch model from configuration: {}".format(str(config)))
model = BertForPreTraining(config)
# type(model)

In [72]:
model.from_pretrained('pytorch-bert-small')

BertForPreTraining(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30522, 512, padding_idx=0)
      (position_embeddings): Embedding(512, 512)
      (token_type_embeddings): Embedding(2, 512)
      (LayerNorm): LayerNorm((512,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0): BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=512, out_features=512, bias=True)
              (key): Linear(in_features=512, out_features=512, bias=True)
              (value): Linear(in_features=512, out_features=512, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=512, out_features=512, bias=True)
              (LayerNorm): LayerNorm((512,), eps=1e-12, elementwise_affine

In [50]:
# Load weights from bin file
model.load_state_dict(torch.load(PYTORCH_DUMP_PATH))

<All keys matched successfully>

In [55]:
from transformers import BertTokenizer
OUTPUT_VOCAB_FILE='bert-small/vocab.txt'
tokenizer = BertTokenizer(vocab_file=OUTPUT_VOCAB_FILE, do_lower_case=True)