In [1]:
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch

In [None]:
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using {device} device")

tokenizer = AutoTokenizer.from_pretrained("gpt2")
model = AutoModelForCausalLM.from_pretrained("gpt2").to(device)

prompt = "Today I believe I can fly"
input_ids = tokenizer(prompt, return_tensors="pt")
input_ids = {k: v.to(device) for k, v in input_ids.items()}

# Greedy search
outputs = model.generate(**input_ids, max_length=30)
greedy_out = tokenizer.batch_decode(outputs, skip_special_tokens=False)

# Beam search
outputs = model.generate(**input_ids, max_length=30, num_beams=5, early_stopping=True)
beam_out = tokenizer.batch_decode(outputs, skip_special_tokens=False)

# Top k sampling
outputs = model.generate(**input_ids, max_length=30, do_sample=True, top_k=50)
top_k_out = tokenizer.batch_decode(outputs, skip_special_tokens=False)

# Top p sampling
outputs = model.generate(**input_ids, max_length=30, do_sample=True, top_p=0.92)
top_p_out = tokenizer.batch_decode(outputs, skip_special_tokens=False)

Using cuda device


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


In [13]:
top_p_out

['Today I believe I can fly. I can fly. I can fly. I can fly. I can fly. I can fly. I can fly']

In [None]:
input_ids = tokenizer(prompt, return_tensors="pt")
input_ids


tensor([[8888,  314, 1975,  314,  460, 6129]])