In [13]:
import transformers

tokenizer = transformers.GPT2Tokenizer.from_pretrained("gpt2")

# add the EOS token as PAD token to avoid warnings
model = transformers.GPT2LMHeadModel.from_pretrained("gpt2", pad_token_id=tokenizer.eos_token_id)

# encode context the generation is conditioned on
input_ids = tokenizer.encode('I enjoy walking with my cute dog', return_tensors='pt')

# generate text until the output length (which includes the context length) reaches 50
greedy_output = model.generate(
    input_ids,
    max_length=50,
)

print("Output:\n" + 100 * '-')
print(tokenizer.decode(greedy_output[0], skip_special_tokens=True))

# activate beam search and early_stopping
beam_output = model.generate(
    input_ids, 
    max_length=50, 
    num_beams=5, 
    early_stopping=True
)

print("\nOutput:\n" + 100 * '-')
print(tokenizer.decode(beam_output[0], skip_special_tokens=True))

# set no_repeat_ngram_size to 2
beam_output = model.generate(
    input_ids, 
    max_length=50, 
    num_beams=5, 
    no_repeat_ngram_size=2, 
    early_stopping=True
)

print("\nOutput:\n" + 100 * '-')
print(tokenizer.decode(beam_output[0], skip_special_tokens=True))

# set return_num_sequences > 1
beam_outputs = model.generate(
    input_ids, 
    max_length=50, 
    num_beams=5, 
    no_repeat_ngram_size=2, 
    num_return_sequences=5, 
    early_stopping=True
)

# now we have 3 output sequences
print("\nOutput:\n" + 100 * '-')
for i, beam_output in enumerate(beam_outputs):
  print("{}: {}".format(i, tokenizer.decode(beam_output, skip_special_tokens=True)))

# activate sampling and deactivate top_k by setting top_k sampling to 0
sample_output = model.generate(
    input_ids, 
    do_sample=True, 
    max_length=50, 
    top_k=0
)

print("\nOutput:\n" + 100 * '-')
print(tokenizer.decode(sample_output[0], skip_special_tokens=True))

# use temperature to decrease the sensitivity to low probability candidates
sample_output = model.generate(
    input_ids, 
    do_sample=True, 
    max_length=50, 
    top_k=0, 
    temperature=0.7
)

print("\nOutput:\n" + 100 * '-')
print(tokenizer.decode(sample_output[0], skip_special_tokens=True))

# set top_k to 50
sample_output = model.generate(
    input_ids, 
    do_sample=True, 
    max_length=50, 
    top_k=50
)

print("\nOutput:\n" + 100 * '-')
print(tokenizer.decode(sample_output[0], skip_special_tokens=True))

Output:
----------------------------------------------------------------------------------------------------
I enjoy walking with my cute dog, but I'm not sure if I'll ever be able to walk with my dog. I'm not sure if I'll ever be able to walk with my dog.

I'm not sure if I'll

Output:
----------------------------------------------------------------------------------------------------
I enjoy walking with my cute dog, but I'm not sure if I'll ever be able to walk with him again.

I'm not sure if I'll ever be able to walk with him again. I'm not sure if I'll

Output:
----------------------------------------------------------------------------------------------------
I enjoy walking with my cute dog, but I'm not sure if I'll ever be able to walk with him again.

I've been thinking about this for a while now, and I think it's time for me to take a break

Output:
----------------------------------------------------------------------------------------------------
0: I enjoy walking with my

In [17]:
import transformers

tokenizer = transformers.GPT2Tokenizer.from_pretrained("gpt2")

# add the EOS token as PAD token to avoid warnings
model = transformers.GPT2LMHeadModel.from_pretrained("gpt2", pad_token_id=tokenizer.eos_token_id)

# encode context the generation is conditioned on
input_ids = tokenizer.encode('I enjoy walking with my cute dog', return_tensors='pt')

def _maybe_add(args, key, val):
    if val is not None:
        args[key] = val

def _required(key, val):
    if not val:
        raise TypeError(f"{key} can't be {val}")

args = dict()

prompt = "This is the default prompt"
_required("prompt", prompt)
inputs = tokenizer.encode(prompt, return_tensors='pt')
_maybe_add(args, "inputs", inputs)

do_sample=True
_maybe_add(args, "do_sample", do_sample)

early_stopping=True
_maybe_add(args, "early_stopping", early_stopping)

max_length=50
_maybe_add(args, "max_length", max_length)

no_repeat_ngram_size=2
_maybe_add(args, "no_repeat_ngram_size", no_repeat_ngram_size)

num_beams=5
_maybe_add(args, "num_beams", num_beams)

num_return_sequences=5
_maybe_add(args, "num_return_sequences", num_return_sequences)

temperature=0.7
_maybe_add(args, "temperature", temperature)

top_k=0 # top_k=50
_maybe_add(args, "top_k", top_k)

# activate sampling and deactivate top_k by setting top_k sampling to 0
sample_output = model.generate(**args)

skip_special_tokens=True

print("\nOutput:\n" + 100 * '-')
print(tokenizer.decode(sample_output[0], skip_special_tokens=skip_special_tokens))


Output:
----------------------------------------------------------------------------------------------------
This is the default prompt.

If you want to change the prompt, you can use the following command:
