In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import torch
from transformers import GPT2Tokenizer, GPT2Model, pipeline, GPT2LMHeadModel, GenerationConfig

## GPT2

In [13]:
tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
tokenizer.pad_token = tokenizer.eos_token
model = GPT2Model.from_pretrained('gpt2')
text = "The Fox is being late for work. The Cat is drinking the coffee. Who is more chill?"
encoded_input = tokenizer(text, return_tensors='pt')
output = model(**encoded_input)
print(output.last_hidden_state.shape)

torch.Size([1, 20, 768])


In [14]:
generator = pipeline('text-generation', model='gpt2')

Device set to use cpu


In [15]:
s = 'The Fox is being late for work. The Cat is drinking the coffee. Who is more chill, the Fox or the Cat?'
s = 'What is the capital of USA?'
s = 'Given dictionary list values comma separated. Example. Input: {"abc": 1, "def": "2"}. Answer: 1, "2". Input: {"key": "pet", "key2": "xyz", "ArithmeticError": "2+2=4"}. Answer:'
# s = 'Context: The capital of the United States of America is Washington D.C. Question: What is the capital of USA? Answer:'
# s = 'Context 2+2=4, 2*2=4. Question: How much is 2 + 2? Answer:'
generator(s, max_length=100, num_return_sequences=5)

Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


[{'generated_text': 'Given dictionary list values comma separated. Example. Input: {"abc": 1, "def": "2"}. Answer: 1, "2". Input: {"key": "pet", "key2": "xyz", "ArithmeticError": "2+2=4"}. Answer: 4. Input: "x yz", "\\t{{name}}=y\\t" Output: {"hello": {"className":"DictionaryList"}. Answer: "{main:'},
 {'generated_text': 'Given dictionary list values comma separated. Example. Input: {"abc": 1, "def": "2"}. Answer: 1, "2". Input: {"key": "pet", "key2": "xyz", "ArithmeticError": "2+2=4"}. Answer: 1, "2". Input: {"key": "xyz", "key2": "math", "arithmeticError": "2+1=8"}. Result: 1, "'},
 {'generated_text': 'Given dictionary list values comma separated. Example. Input: {"abc": 1, "def": "2"}. Answer: 1, "2". Input: {"key": "pet", "key2": "xyz", "ArithmeticError": "2+2=4"}. Answer: 1, "4". Example. Input: {"foo": "3", "bar": 1}. Answer: 3. Example. Input: {"abc": "a", "def":'},
 {'generated_text': 'Given dictionary list values comma separated. Example. Input: {"abc": 1, "def": "2"}. Answer:

In [19]:
model

GPT2Model(
  (wte): Embedding(50257, 768)
  (wpe): Embedding(1024, 768)
  (drop): Dropout(p=0.1, inplace=False)
  (h): ModuleList(
    (0-11): 12 x GPT2Block(
      (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
      (attn): GPT2Attention(
        (c_attn): Conv1D(nf=2304, nx=768)
        (c_proj): Conv1D(nf=768, nx=768)
        (attn_dropout): Dropout(p=0.1, inplace=False)
        (resid_dropout): Dropout(p=0.1, inplace=False)
      )
      (ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
      (mlp): GPT2MLP(
        (c_fc): Conv1D(nf=3072, nx=768)
        (c_proj): Conv1D(nf=768, nx=3072)
        (act): NewGELUActivation()
        (dropout): Dropout(p=0.1, inplace=False)
      )
    )
  )
  (ln_f): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
)

In [19]:
model_lmh = GPT2LMHeadModel.from_pretrained('gpt2')
model_lmh

GPT2LMHeadModel(
  (transformer): GPT2Model(
    (wte): Embedding(50257, 768)
    (wpe): Embedding(1024, 768)
    (drop): Dropout(p=0.1, inplace=False)
    (h): ModuleList(
      (0-11): 12 x GPT2Block(
        (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (attn): GPT2Attention(
          (c_attn): Conv1D(nf=2304, nx=768)
          (c_proj): Conv1D(nf=768, nx=768)
          (attn_dropout): Dropout(p=0.1, inplace=False)
          (resid_dropout): Dropout(p=0.1, inplace=False)
        )
        (ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (mlp): GPT2MLP(
          (c_fc): Conv1D(nf=3072, nx=768)
          (c_proj): Conv1D(nf=768, nx=3072)
          (act): NewGELUActivation()
          (dropout): Dropout(p=0.1, inplace=False)
        )
      )
    )
    (ln_f): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
  )
  (lm_head): Linear(in_features=768, out_features=50257, bias=False)
)

In [20]:
gen_cfg = GenerationConfig(
    max_new_tokens=20,
    eos_token_id=tokenizer.sep_token_id,
    do_sample=True,
    top_p=0.95,
    top_k=50,
    # temperature=0.6,
)

In [24]:
text = "The Fox is being late for work. The Cat is drinking the coffee. Who is more chill?"
text = 'Who is the president of the United States?'
encoded_input = tokenizer(text, return_tensors='pt')
input_ids = encoded_input.input_ids

In [25]:
attention_mask = torch.ones(input_ids.shape, device=input_ids.device, dtype=bool)
ouput_ids = model_lmh.generate(input_ids=input_ids, attention_mask=attention_mask)
out = tokenizer.decode(ouput_ids.squeeze())
print(out)

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Who is the president of the United States?

The president of the United States is the president of the United States.

The president
