In [3]:
#Import Libraries
import tensorflow as tf
import transformers
from numpy import random
from transformers import (TFGPT2LMHeadModel,
                          GPT2Tokenizer,
                          GPT2Config)

In [4]:
print("tf version={}".format(tf.__version__))
print("huggingface/transformer version={}".format(transformers.__version__))

tf version=2.11.0
huggingface/transformer version=4.26.1


In [5]:
#Model Setup
model_name = "gpt2-medium"
config = GPT2Config.from_pretrained(model_name)
tokenizer = GPT2Tokenizer.from_pretrained(model_name)
model = TFGPT2LMHeadModel.from_pretrained(model_name, config=config)

Downloading (…)lve/main/config.json:   0%|          | 0.00/718 [00:00<?, ?B/s]

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to see activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development


Downloading (…)olve/main/vocab.json:   0%|          | 0.00/1.04M [00:00<?, ?B/s]

Downloading (…)olve/main/merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

Downloading (…)"tf_model.h5";:   0%|          | 0.00/1.42G [00:00<?, ?B/s]

All model checkpoint layers were used when initializing TFGPT2LMHeadModel.

All the layers of TFGPT2LMHeadModel were initialized from the model checkpoint at gpt2-medium.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFGPT2LMHeadModel for predictions without further training.


Downloading (…)neration_config.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

In [6]:
#Generate Text
# Encode context the generation is conditioned on
input_ids = tokenizer.encode('Watson you are', return_tensors='tf')
input_ids

<tf.Tensor: shape=(1, 4), dtype=int32, numpy=array([[   54, 13506,   345,   389]])>

In [7]:
#Greedy Decoding

# generate text until the output length (which includes the context length) reaches 50
greedy_output = model.generate(input_ids, max_length=20)

print("Output:\n" + 110 * '-')
print(tokenizer.decode(greedy_output[0], skip_special_tokens=True))

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Output:
--------------------------------------------------------------------------------------------------------------
Watson you are a great guy and I hope you are doing well. I am sorry to hear


In [8]:
#Sampled Decoding

tf.random.set_seed(0)

# Use a combination of decoding techniques
sample_outputs = model.generate(
    input_ids,
    do_sample=True, 
    max_length=20, 
    top_k=50, 
    top_p=0.95, 
    num_return_sequences=3
)

print("Output:\n" + 110 * '-')
for i, sample_output in enumerate(sample_outputs):
    print("{}: {}".format(i, tokenizer.decode(sample_output, skip_special_tokens=True)))
    print("-"*110)


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Output:
--------------------------------------------------------------------------------------------------------------
0: Watson you are so beautiful. You are just amazing." He put her head down and smiled.
--------------------------------------------------------------------------------------------------------------
1: Watson you are wrong, you will never understand anyone. People that do not understand will learn nothing
--------------------------------------------------------------------------------------------------------------
2: Watson you are an excellent person. I have always followed you on twitter and would like to know
--------------------------------------------------------------------------------------------------------------
