In [1]:
import numpy as np
import tensorflow as tf
import logging

logger = logging.getLogger(__name__)

In [2]:
tf_path = 'alxlnet-base-2020-04-10/model.ckpt-300000'
init_vars = tf.compat.v1.train.list_variables(tf_path)

tf_weights = {}
for name, shape in init_vars:
    logger.info("Loading TF weight {} with shape {}".format(name, shape))
    array = tf.compat.v1.train.load_variable(tf_path, name)
    tf_weights[name] = array

In [3]:
tf_weights['model/transformer/word_embedding/lookup_table'].shape

(32000, 128)

In [4]:
tf_weights['model/transformer/word_embedding/lookup_table_2'].shape

(128, 768)

In [5]:
tf_weights['model/lm_loss/bias'].shape

(32000,)

In [6]:
tf_weights.keys()

dict_keys(['beta1_power', 'beta2_power', 'global_step', 'model/lm_loss/bias', 'model/lm_loss/bias/Adam', 'model/lm_loss/bias/Adam_1', 'model/transformer/layer_shared/ff/LayerNorm/beta', 'model/transformer/layer_shared/ff/LayerNorm/beta/Adam', 'model/transformer/layer_shared/ff/LayerNorm/beta/Adam_1', 'model/transformer/layer_shared/ff/LayerNorm/gamma', 'model/transformer/layer_shared/ff/LayerNorm/gamma/Adam', 'model/transformer/layer_shared/ff/LayerNorm/gamma/Adam_1', 'model/transformer/layer_shared/ff/layer_1/bias', 'model/transformer/layer_shared/ff/layer_1/bias/Adam', 'model/transformer/layer_shared/ff/layer_1/bias/Adam_1', 'model/transformer/layer_shared/ff/layer_1/kernel', 'model/transformer/layer_shared/ff/layer_1/kernel/Adam', 'model/transformer/layer_shared/ff/layer_1/kernel/Adam_1', 'model/transformer/layer_shared/ff/layer_2/bias', 'model/transformer/layer_shared/ff/layer_2/bias/Adam', 'model/transformer/layer_shared/ff/layer_2/bias/Adam_1', 'model/transformer/layer_shared/ff/

In [7]:
from transformers import XLNetConfig

config = XLNetConfig.from_json_file('alxlnet-base-2020-04-10/config.json')

In [8]:
from modeling_alxlnet import XLNetLMHeadModel, load_tf_weights_in_xlnet

In [9]:
model = XLNetLMHeadModel(config)

In [10]:
load_tf_weights_in_xlnet(model, config, tf_path)

XLNetLMHeadModel(
  (transformer): XLNetModel(
    (word_embedding): Embedding(32000, 128)
    (word_embedding2): Embedding(128, 768)
    (layer): ModuleList(
      (0): XLNetLayer(
        (rel_attn): XLNetRelativeAttention(
          (layer_norm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
          (dropout): Dropout(p=0.1, inplace=False)
        )
        (ff): XLNetFeedForward(
          (layer_norm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
          (layer_1): Linear(in_features=768, out_features=3072, bias=True)
          (layer_2): Linear(in_features=3072, out_features=768, bias=True)
          (dropout): Dropout(p=0.1, inplace=False)
        )
        (dropout): Dropout(p=0.1, inplace=False)
      )
      (1): XLNetLayer(
        (rel_attn): XLNetRelativeAttention(
          (layer_norm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
          (dropout): Dropout(p=0.1, inplace=False)
        )
        (ff): XLNetFeedForward(
          (layer_no

In [15]:
!mkdir alxlnet-base

In [16]:
from transformers import CONFIG_NAME, WEIGHTS_NAME
import os
import torch

pytorch_weights_dump_path = os.path.join('alxlnet-base', WEIGHTS_NAME)
pytorch_config_dump_path = os.path.join('alxlnet-base', CONFIG_NAME)
torch.save(model.state_dict(), pytorch_weights_dump_path)

In [17]:
with open(pytorch_config_dump_path, "w", encoding="utf-8") as f:
    f.write(config.to_json_string())

In [19]:
from modeling_alxlnet import XLNetModel
from transformers import XLNetTokenizer

In [20]:
tokenizer = XLNetTokenizer('sp10m.cased.v9.model', do_lower_case = False)
tokenizer.save_pretrained('alxlnet-base')

('alxlnet-base/spiece.model',
 'alxlnet-base/special_tokens_map.json',
 'alxlnet-base/added_tokens.json')

In [21]:
tokenizer = XLNetTokenizer.from_pretrained('./alxlnet-base', do_lower_case = False)

In [22]:
config = XLNetConfig.from_json_file('alxlnet-base-2020-04-10/config.json')
config

XLNetConfig {
  "attn_type": "bi",
  "bi_data": false,
  "bos_token_id": 1,
  "clamp_len": -1,
  "d_head": 64,
  "d_inner": 3072,
  "d_model": 768,
  "dropout": 0.1,
  "end_n_top": 5,
  "eos_token_id": 2,
  "ff_activation": "gelu",
  "initializer_range": 0.02,
  "layer_norm_eps": 1e-12,
  "mem_len": null,
  "model_type": "xlnet",
  "n_head": 12,
  "n_layer": 12,
  "pad_token_id": 5,
  "reuse_len": null,
  "same_length": false,
  "start_n_top": 5,
  "summary_activation": "tanh",
  "summary_last_dropout": 0.1,
  "summary_type": "last",
  "summary_use_proj": true,
  "untie_r": true,
  "vocab_size": 32000
}

In [23]:
model = XLNetModel.from_pretrained('./alxlnet-base', config = config)

In [24]:
input_ids = torch.tensor([tokenizer.encode("husein tk suka mkan ayam", add_special_tokens=True)])

In [32]:
model(input_ids, attention_mask = torch.ones(input_ids.size()))

(tensor([[[ 0.0969,  0.7955, -0.3110,  ..., -0.4857,  1.0827, -0.8285],
          [ 0.5202,  0.3320, -0.2579,  ..., -1.6332, -0.8103, -1.0283],
          [-1.2109,  0.0866, -0.7792,  ..., -0.5070, -1.6178, -1.2713],
          ...,
          [-0.9953,  0.7751, -0.7880,  ..., -0.7724, -0.2612,  0.6644],
          [ 0.3525, -0.6217, -1.0145,  ..., -1.4954, -1.7150, -0.2340],
          [-0.2612, -0.4044, -0.7142,  ...,  0.2696, -0.6073,  0.5948]]],
        grad_fn=<PermuteBackward>),)