In [1]:
import torch
from transformers import AutoTokenizer, GPT2LMHeadModel

In [2]:
tokenizer = AutoTokenizer.from_pretrained("gpt2", padding_side='left')
model = GPT2LMHeadModel.from_pretrained("gpt2")

In [3]:
tokenizer.eos_token

'<|endoftext|>'

In [4]:
tokenizer.pad_token = tokenizer.eos_token
inputs = tokenizer(["Hello, my dog is cute", "Hello"], return_tensors="pt", padding=True)
outputs = model(**inputs, labels=inputs["input_ids"])
loss = outputs.loss
logits = outputs.logits

In [5]:
inputs

{'input_ids': tensor([[15496,    11,   616,  3290,   318, 13779],
        [50256, 50256, 50256, 50256, 50256, 15496]]), 'attention_mask': tensor([[1, 1, 1, 1, 1, 1],
        [0, 0, 0, 0, 0, 1]])}

In [6]:
logits.shape

torch.Size([2, 6, 50257])

In [7]:
outputs.keys()

odict_keys(['loss', 'logits', 'past_key_values'])

In [8]:
inputs.keys()

dict_keys(['input_ids', 'attention_mask'])

In [20]:
infer = model.generate(**inputs, pad_token_id=tokenizer.eos_token_id, max_new_tokens=256, num_beams=5, early_stopping=True, no_repeat_ngram_size=2)

In [21]:
infer

tensor([[15496,    11,   616,  3290,   318, 13779,    11,   475,   314,  1101,
           407,  1654,   611,   673,  7832,   340,   393,   407,    13,   314,
           836,   470,   760,   644,   284,   466,   351,   607,    11,   523,
           314,  1183,   655,  2666,   340,   379,   326,   526,   198,   198,
             1,    40,  1101,  7926,    11,   314,  1422,   470,  1612,   284,
           307, 22066,   553,   673,   531,    13,   366,    40,   655,  2227,
           284,  1309,   345,   760,   326,   314,  1842,   345,    11,   290,
           314,   765,   345,   284,   760,   703,   881,   314,  2051,   345,
            13,   921,   821,   884,   257,  7932,  1048,    13,  6952,   345,
           523,   881,   329,  2279,   345,  1053,  1760,   329,   502,   290,
           616,  1641,    13,   632,   338,   587,   257,   890,   640,  1201,
           314,  1053,   587,  1498,   284,   910, 24829,   284,   345,   526,
         50256],
        [50256, 50256, 50256, 50256

In [22]:
print(tokenizer.decode(infer[0], skip_special_tokens=False))

Hello, my dog is cute, but I'm not sure if she likes it or not. I don't know what to do with her, so I'll just leave it at that."

"I'm sorry, I didn't mean to be rude," she said. "I just wanted to let you know that I love you, and I want you to know how much I miss you. You're such a wonderful person. Thank you so much for everything you've done for me and my family. It's been a long time since I've been able to say goodbye to you."<|endoftext|>


In [23]:
print(tokenizer.decode(infer[1], skip_special_tokens=False))

<|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|>Hello

I've been working on this for a while now, and I'm really excited to share it with you. It's been a long time coming, but it's finally here. I hope you enjoy it as much as I do, because I know you're going to love it too.


If you have any questions or comments, feel free to leave them in the comments below, or send me an e-mail.<|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|>


In [24]:
infer2 = model.generate(**inputs, pad_token_id=tokenizer.eos_token_id, max_new_tokens=256, num_beams=5, early_stopping=True, no_repeat_ngram_size=2, num_return_sequences=5)

In [28]:
infer2.size()

torch.Size([10, 151])

In [30]:
for i in range(10):
    print(tokenizer.decode(infer2[i], skip_special_tokens=False))

Hello, my dog is cute, but I'm not sure if she likes it or not. I don't know what to do with her, so I'll just leave it at that."

"I'm sorry, I didn't mean to be rude," she said. "I just wanted to let you know that I love you, and I want you to know how much I miss you. You're such a wonderful person. Thank you so much for everything you've done for me and my family. It's been a long time since I've been able to say goodbye to you."<|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|>
Hello, my dog is cute, but I'm not sure if she likes it or not. I don't know what to do with her, so I'll just leave it at that."

"I'm sorry, I didn't mean to

In [38]:
import torch
torch.manual_seed(0)

<torch._C.Generator at 0x7f9595975930>

In [39]:
sample_output = model.generate(
    **inputs, 
    do_sample=True, 
    max_length=50, 
    top_k=0,
    pad_token_id=tokenizer.eos_token_id, 
)

In [40]:
print("Output:\n" + 100 * '-')
print(tokenizer.decode(sample_output[0], skip_special_tokens=False))

Output:
----------------------------------------------------------------------------------------------------
Hello, my dog is cute along with our families, but the heart is mine now... I can't sit here anymore with my head so tiny. I don't want him to see me like we did today and just not see any of our friends


In [41]:
sample_output = model.generate(
    **inputs, 
    do_sample=True, 
    max_length=50, 
    top_k=0,
    pad_token_id=tokenizer.eos_token_id,
    temperature=0.7
)

In [42]:
print("Output:\n" + 100 * '-')
print(tokenizer.decode(sample_output[0], skip_special_tokens=False))

Output:
----------------------------------------------------------------------------------------------------
Hello, my dog is cute. He has the same personality as his owners. I feel the same way about him.

And yet, he still care about me.

I am not sure if I can take it anymore. Do I


In [43]:
sample_output = model.generate(
    **inputs, 
    do_sample=True, 
    max_length=50, 
    top_k=6,
    pad_token_id=tokenizer.eos_token_id, 
)

In [44]:
for i in range(2):
    print(tokenizer.decode(sample_output[i], skip_special_tokens=False))

Hello, my dog is cute. She has no problem with my dogs and I don't have any issues with her being in my home. It is not a big deal. I am happy that we have a good relationship but I don't want to
<|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|>Hello

I have a few questions, I am sorry to have to answer, but I am sure you have all been asking me about this, I am sure I have all been asking you too. I am very happy


In [45]:
sample_output = model.generate(
    **inputs, 
    do_sample=True, 
    max_length=50, 
    top_p=0.92, 
    top_k=0,
    pad_token_id=tokenizer.eos_token_id, 
)

In [46]:
for i in range(2):
    print(tokenizer.decode(sample_output[i], skip_special_tokens=False))

Hello, my dog is cute! I love she was wagging her tail when I ran across her and it took quite a bit for me to get her over there. All of a sudden I noticed her eyes were open so I snagged her to
<|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|>Hello, there's a lot of people who don't have any jobs. The military's been working for years, developing weapons for warlords. Everybody thinks we are robots because we are not. All that's happening is this


In [48]:
sample_output = model.generate(
    **inputs, 
    do_sample=True, 
    max_length=50, 
    top_p=0.95, 
    top_k=50,
    pad_token_id=tokenizer.eos_token_id,
    early_stopping=True,
    num_return_sequences=3
)

In [49]:
for i in range(6):
    print(tokenizer.decode(sample_output[i], skip_special_tokens=False))

Hello, my dog is cute.

He's not being very cute.

That's not cute.

That's what I was trying to tell you.

Hey, it's okay.

He won't stop staring
Hello, my dog is cute! I'd be lying if I said he was happy!" said Kimbo, laughing.

In the morning, Kimbo is enjoying playing inside the bathroom and eating a pie of bread while his girlfriend, Kayla
Hello, my dog is cute. I'm sure it is adorable at first, but I have no idea why so many people have gotten into this situation before I did. It would be really wrong to think that it's a problem that some people have
<|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|>Hello for the fact that you've been reading this all day. Let's break it down, in its entirety:

What's your favorite part about playing Starcraft 2?

Favorite bit about watching it?


<|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|>Hello was doing my best to do so. I should be able to explain what a lot of things are, what I'm going to do next. But th