In [1]:
import tensorflow as tf
from transformers import TFAutoModelForCausalLM, AutoTokenizer

In [2]:
tokenizer = AutoTokenizer.from_pretrained("xlnet-base-cased")

In [3]:
model = TFAutoModelForCausalLM.from_pretrained("xlnet-base-cased")

All model checkpoint layers were used when initializing TFXLNetLMHeadModel.

All the layers of TFXLNetLMHeadModel were initialized from the model checkpoint at xlnet-base-cased.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFXLNetLMHeadModel for predictions without further training.


In [4]:
tokenizer.vocab_size

32000

In [5]:
vocab = tokenizer.get_vocab()
vocab

{'▁gambler': 23698,
 '▁Gar': 3007,
 'accident': 27056,
 '▁dwarf': 17705,
 '▁Flip': 28192,
 '▁geological': 22690,
 '▁maternal': 18200,
 'built': 12152,
 '▁gather': 6247,
 'ural': 9323,
 'leaning': 22436,
 'ied': 8275,
 '▁determination': 7775,
 'Wolf': 31439,
 'lien': 14343,
 '▁Cy': 8605,
 '▁teasing': 23622,
 'rec': 10235,
 '▁following': 405,
 '▁irre': 15573,
 '▁cheque': 24387,
 '▁provisions': 6025,
 '▁yawn': 26688,
 'both': 13717,
 '▁torment': 20724,
 '▁guard': 2957,
 '▁Ferrer': 28434,
 '▁initiated': 8843,
 'sports': 16464,
 '▁Pang': 25578,
 '▁Glass': 11658,
 ':05': 19947,
 'MDC': 26981,
 '▁Fred': 6621,
 '▁wrong': 1411,
 '▁address': 1131,
 '▁ensuring': 8260,
 '▁Tex': 22786,
 'Friend': 12389,
 '▁icon': 8201,
 '▁accelerate': 13929,
 '▁universe': 6486,
 '▁confused': 6833,
 '▁incumbent': 9771,
 '▁Uribe': 23646,
 '▁Constable': 31432,
 '▁tortoise': 31946,
 'PHOTO': 26455,
 '▁Bang': 12561,
 '▁jagged': 29690,
 '▁lagging': 29409,
 '▁Gainesville': 31212,
 'spir': 7508,
 '▁Di': 1717,
 '▁repatriate

In [7]:
id2word = {i: word for word, i in vocab.items()}

In [8]:
id2word

{23698: '▁gambler',
 3007: '▁Gar',
 27056: 'accident',
 17705: '▁dwarf',
 28192: '▁Flip',
 22690: '▁geological',
 18200: '▁maternal',
 12152: 'built',
 6247: '▁gather',
 9323: 'ural',
 22436: 'leaning',
 8275: 'ied',
 7775: '▁determination',
 31439: 'Wolf',
 14343: 'lien',
 8605: '▁Cy',
 23622: '▁teasing',
 10235: 'rec',
 405: '▁following',
 15573: '▁irre',
 24387: '▁cheque',
 6025: '▁provisions',
 26688: '▁yawn',
 13717: 'both',
 20724: '▁torment',
 2957: '▁guard',
 28434: '▁Ferrer',
 8843: '▁initiated',
 16464: 'sports',
 25578: '▁Pang',
 11658: '▁Glass',
 19947: ':05',
 26981: 'MDC',
 6621: '▁Fred',
 1411: '▁wrong',
 1131: '▁address',
 8260: '▁ensuring',
 22786: '▁Tex',
 12389: 'Friend',
 8201: '▁icon',
 13929: '▁accelerate',
 6486: '▁universe',
 6833: '▁confused',
 9771: '▁incumbent',
 23646: '▁Uribe',
 31432: '▁Constable',
 31946: '▁tortoise',
 26455: 'PHOTO',
 12561: '▁Bang',
 29690: '▁jagged',
 29409: '▁lagging',
 31212: '▁Gainesville',
 7508: 'spir',
 1717: '▁Di',
 27779: '▁rep

In [9]:
sequence = f"Once upon a time, there was "

In [10]:
input_ids = tokenizer.encode(sequence, return_tensors="tf")

In [11]:
input_ids

<tf.Tensor: shape=(1, 9), dtype=int32, numpy=array([[1977,  975,   24,   92,   19,  105,   30,    4,    3]])>

In [12]:
id2word[1977]

'▁Once'

In [13]:
result = model(input_ids)

In [15]:
logits = result[0]
logits

<tf.Tensor: shape=(1, 9, 32000), dtype=float32, numpy=
array([[[-20.825823, -34.659054, -34.448853, ..., -27.784613,
         -30.74444 , -35.80377 ],
        [-22.204262, -37.775707, -37.351646, ..., -30.71046 ,
         -29.529606, -36.709103],
        [-25.660423, -41.250134, -40.92931 , ..., -33.947018,
         -31.243872, -37.114754],
        ...,
        [-19.05976 , -33.74293 , -33.5181  , ..., -25.464396,
         -26.773752, -34.887245],
        [-16.84529 , -33.212406, -32.902584, ..., -25.412714,
         -25.436672, -33.201523],
        [-14.362121, -31.215874, -30.875034, ..., -23.443584,
         -22.542244, -30.336607]]], dtype=float32)>

In [16]:
logits.shape

TensorShape([1, 9, 32000])

In [17]:
next_token_logits = logits[:, -1, :]