# Baseline HuggingFace GPT-2 Models

Below are the out-of-the-box runs of GPT-2 when prompted to generate a book summary with given genre constraints. We are using HuggingFace's pipeline and model system to generate text!

In [None]:
# import hugging face transformer data
from transformers import pipeline, set_seed

In [None]:
# input prompt to use for all models
text = "Generate a book summary with genre fiction, science fiction:\n"

In [None]:
# Pre-trained GPT-2 Model
gpt_generator = pipeline('text-generation', model='gpt2')
set_seed(42)
gpt_generated_text = gpt_generator(text, max_length=50, num_return_sequences=3)

# print outputs individually
for gt in gpt_generated_text:
    print("--------- New Generated Text----------- \n")
    print(gt['generated_text'])

In [None]:
# Pre-trained distilGPT-2 Model
distil_generator = pipeline('text-generation', model='distilgpt2')
set_seed(42)
distil_generated_text = distil_generator(text, max_length=50, num_return_sequences=3)

# print outputs individually
for gt in distil_generated_text:
    print("--------- New Generated Text----------- \n")
    print(gt['generated_text'])

In [None]:
# Exploring different decoding methods with pre-trained model
from transformers import AutoModelForCausalLM, AutoTokenizer

prompt = "Generate a story summary with genre science fiction, mystery:\n"
checkpoint = "gpt2"

tokenizer = AutoTokenizer.from_pretrained(checkpoint)
inputs = tokenizer(prompt, return_tensors="pt")

model = AutoModelForCausalLM.from_pretrained(checkpoint)
outputs = model.generate(**inputs, 
    max_length=150, 
    num_beams=2, 
    no_repeat_ngram_size=2,
    do_sample = True,
    early_stopping=True)
output = tokenizer.batch_decode(outputs, skip_special_tokens=True)
print(output)