Shows how one can generate text given a prompt and some hyperparameters, using either minGPT or huggingface/transformers

In [1]:
import torch
from transformers import GPT2Tokenizer, GPT2LMHeadModel
from mingpt.model import GPT
from mingpt.utils import set_seed
from mingpt.bpe import BPETokenizer
set_seed(3407)

In [2]:
use_mingpt = True # use minGPT or huggingface/transformers model?
model_type = 'gpt2'
device = 'cpu'

In [3]:
if use_mingpt:
    model = GPT.from_pretrained(model_type)
else:
    model = GPT2LMHeadModel.from_pretrained(model_type)
    model.config.pad_token_id = model.config.eos_token_id # suppress a warning

# ship model to device and set to eval mode
model.to(device)
model.eval();

number of parameters: 124.44M


In [4]:

def generate(prompt='', num_samples=10, steps=20, do_sample=True):
        
    # tokenize the input prompt into integer input sequence
    tokenizer = GPT2Tokenizer.from_pretrained(model_type)
    if prompt == '': 
        # to create unconditional samples...
        # huggingface/transformers tokenizer special cases these strings
        prompt = '<|endoftext|>'
    encoded_input = tokenizer(prompt, return_tensors='pt').to(device)
    x = encoded_input['input_ids']
    
    # we'll process all desired num_samples in a batch, so expand out the batch dim
    x = x.expand(num_samples, -1)

    # forward the model `steps` times to get samples, in a batch
    y = model.generate(x, max_new_tokens=steps, do_sample=do_sample, top_k=40)
    
    for i in range(num_samples):
        out = tokenizer.decode(y[i].cpu().squeeze())
        print('-'*80)
        print(out)
        

In [5]:
generate(prompt='Dylan Skinner is the', num_samples=10, steps=20)

--------------------------------------------------------------------------------
Dylan Skinner is the author of "The Biggest Bad Boy in America": A History of Black America at the University of
--------------------------------------------------------------------------------
Dylan Skinner is the President of Kratom Awareness Month, a national coalition of community health advocates and activists who call for more
--------------------------------------------------------------------------------
Dylan Skinner is the author of "Dylan's War: What I Learned as a Boy a Slave" and other books
--------------------------------------------------------------------------------
Dylan Skinner is the co-founder of the Internet Archive, and currently serves as a director at Creative Cloud Group.<|endoftext|>
--------------------------------------------------------------------------------
Dylan Skinner is the CEO of V.U.

He recently shared a series of questions in which he spoke with
-------------------