In [1]:
from transformers import AutoModelForCausalLM, AutoTokenizer, GenerationConfig
import torch

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
model_id="openai-community/gpt2"
torch_dtype = torch.float32

In [3]:
model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=torch_dtype).to("mps")

In [4]:
print(model)

GPT2LMHeadModel(
  (transformer): GPT2Model(
    (wte): Embedding(50257, 768)
    (wpe): Embedding(1024, 768)
    (drop): Dropout(p=0.1, inplace=False)
    (h): ModuleList(
      (0-11): 12 x GPT2Block(
        (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (attn): GPT2SdpaAttention(
          (c_attn): Conv1D()
          (c_proj): Conv1D()
          (attn_dropout): Dropout(p=0.1, inplace=False)
          (resid_dropout): Dropout(p=0.1, inplace=False)
        )
        (ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (mlp): GPT2MLP(
          (c_fc): Conv1D()
          (c_proj): Conv1D()
          (act): NewGELUActivation()
          (dropout): Dropout(p=0.1, inplace=False)
        )
      )
    )
    (ln_f): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
  )
  (lm_head): Linear(in_features=768, out_features=50257, bias=False)
)


In [5]:
print(model.get_input_embeddings())

Embedding(50257, 768)


In [6]:
tokenizer = AutoTokenizer.from_pretrained(model_id)



In [7]:
input_sequence = tokenizer.encode(["Hello, world", "Every step moves you forward"])
print(input_sequence)

[15496, 11, 995, 6109, 2239, 6100, 345, 2651]


In [8]:
input_sequence_1 = torch.tensor(tokenizer.encode("Every effort moves you"))
print(input_sequence_1)
input_sequence_2 = torch.tensor(tokenizer.encode("Every day holds a"))
print(input_sequence_2)
input_sequence = torch.stack((input_sequence_1, input_sequence_2))
print(input_sequence)

tensor([6109, 3626, 6100,  345])
tensor([6109, 1110, 6622,  257])
tensor([[6109, 3626, 6100,  345],
        [6109, 1110, 6622,  257]])


In [9]:
input_sequence = input_sequence.to("mps")

In [10]:
generation_config = GenerationConfig(output_logits=True, return_dict_in_generate=True)
model_output = model.generate(input_sequence, generation_config=generation_config)
print(model_output)

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
The attention mask is not set and cannot be inferred from input because pad token is same as eos token. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.


GenerateDecoderOnlyOutput(sequences=tensor([[ 6109,  3626,  6100,   345,  2651,    13,   198,   198,   464,   717,
          2239,   318,   284,  1833,   262,  6817,   286,   534,   670,    13],
        [ 6109,  1110,  6622,   257,  2041,  1295,   287,   616,  2612,    13,
           314,  1101,   523, 14066,   284,   423,   587,  1498,   284,   307]],
       device='mps:0'), scores=None, logits=(tensor([[-136.6002, -137.3804, -146.5556,  ..., -148.2978, -147.2155,
         -139.5677],
        [-117.7201, -115.2301, -121.5060,  ..., -124.8349, -119.6037,
         -116.2177]], device='mps:0'), tensor([[-103.5413, -103.4801, -113.8425,  ..., -118.4444, -117.4602,
         -107.9704],
        [ -99.0750,  -96.7911, -103.6109,  ..., -106.3692, -105.6009,
         -100.0326]], device='mps:0'), tensor([[-147.1365, -145.4639, -148.1290,  ..., -159.0972, -159.8802,
         -140.3477],
        [ -61.5315,  -60.6241,  -69.6247,  ...,  -76.1035,  -70.5588,
          -66.0821]], device='mps:0'), 

In [11]:
len(model_output['logits'])

16

In [12]:
for sequence in model_output['sequences']:
    print(sequence)
    print(tokenizer.decode(sequence))

tensor([6109, 3626, 6100,  345, 2651,   13,  198,  198,  464,  717, 2239,  318,
         284, 1833,  262, 6817,  286,  534,  670,   13], device='mps:0')
Every effort moves you forward.

The first step is to understand the importance of your work.
tensor([ 6109,  1110,  6622,   257,  2041,  1295,   287,   616,  2612,    13,
          314,  1101,   523, 14066,   284,   423,   587,  1498,   284,   307],
       device='mps:0')
Every day holds a special place in my heart. I'm so grateful to have been able to be


In [13]:
batch_size = 2

for batch in range(batch_size):
    for logits in model_output['logits']:
        probas = torch.nn.functional.softmax(logits[batch], dim=0)
        prediction = torch.argmax(probas)
        print(tokenizer.decode(prediction))

 forward
.




The
 first
 step
 is
 to
 understand
 the
 importance
 of
 your
 work
.
 special
 place
 in
 my
 heart
.
 I
'm
 so
 grateful
 to
 have
 been
 able
 to
 be


In [14]:
probas, indices = torch.topk(torch.nn.functional.softmax(model_output['logits'][0][0], dim=0), 20)
print(probas)
print(indices)
for index in indices:
    print(tokenizer.decode(index))

tensor([0.1702, 0.1030, 0.0557, 0.0499, 0.0482, 0.0482, 0.0369, 0.0333, 0.0324,
        0.0313, 0.0256, 0.0252, 0.0148, 0.0147, 0.0138, 0.0122, 0.0104, 0.0093,
        0.0085, 0.0083], device='mps:0')
tensor([2651,  284, 1863,   13,   11,  832,  422, 3812,  287, 3371, 5699,  290,
         319, 1497,  656, 1088, 6364,  510,  355,  503], device='mps:0')
 forward
 to
 along
.
,
 through
 from
 toward
 in
 towards
 closer
 and
 on
 away
 into
 around
 slowly
 up
 as
 out
