In [1]:
# !pip install -r requirements.txt
import matplotlib.pyplot as plt
import numpy as np
import time
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# Section 1 : Load the Model

model_name = "./models/gpt2"

tokenizer = AutoTokenizer.from_pretrained(model_name)

model = AutoModelForCausalLM.from_pretrained(model_name)


In [3]:
print(model)

GPT2LMHeadModel(
  (transformer): GPT2Model(
    (wte): Embedding(50257, 768)
    (wpe): Embedding(1024, 768)
    (drop): Dropout(p=0.1, inplace=False)
    (h): ModuleList(
      (0-11): 12 x GPT2Block(
        (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (attn): GPT2Attention(
          (c_attn): Conv1D()
          (c_proj): Conv1D()
          (attn_dropout): Dropout(p=0.1, inplace=False)
          (resid_dropout): Dropout(p=0.1, inplace=False)
        )
        (ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (mlp): GPT2MLP(
          (c_fc): Conv1D()
          (c_proj): Conv1D()
          (act): NewGELUActivation()
          (dropout): Dropout(p=0.1, inplace=False)
        )
      )
    )
    (ln_f): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
  )
  (lm_head): Linear(in_features=768, out_features=50257, bias=False)
)


In [4]:
# Section 2. How to generate a token from the model output tensors


In [5]:
# 1. Tokenize input prompt

prompt = "The strong ML engineer can do"
inputs = tokenizer(prompt, return_tensors ="pt")
inputs 

{'input_ids': tensor([[  464,  1913, 10373, 11949,   460,   466]]), 'attention_mask': tensor([[1, 1, 1, 1, 1, 1]])}

In [8]:
# pass the inputs to the model and retrieve the logits 
with torch.no_grad():
    outputs = model(**inputs)

logits = outputs.logits 
print(logits.shape)

torch.Size([1, 6, 50257])


In [9]:
last_logits = logits[0,-1,:]
next_token_id = last_logits.argmax()
next_token_id

tensor(340)

In [10]:
# decode the most likely token 
tokenizer.decode(next_token_id)

' it'

In [11]:
# the 10 next words 
top_k = torch.topk(last_logits, k = 10)
tokens = [tokenizer.decode(tk) for tk in top_k.indices]
tokens

[' it',
 ' a',
 ' this',
 ' the',
 ' anything',
 ' everything',
 ' all',
 ' things',
 ' what',
 ' much']

In [14]:
next_inputs = {
    "input_ids":torch.cat(
        [inputs["input_ids"], next_token_id.reshape((1,1))],
        dim=1
    ),
    "attention_mask": torch.cat(
        [inputs["attention_mask"], torch.tensor([[1]])],
        dim = 1
    )
}

In [15]:
print(next_inputs["input_ids"],
      next_inputs["input_ids"].shape)
print(next_inputs["attention_mask"],
      next_inputs["attention_mask"].shape)

tensor([[  464,  1913, 10373, 11949,   460,   466,   340]]) torch.Size([1, 7])
tensor([[1, 1, 1, 1, 1, 1, 1]]) torch.Size([1, 7])
