# Inference Workflow

### Import Packages

In [1]:
from gpt2 import GPT2, GPT2Config
import tiktoken
import torch
from torch.nn import functional as F

### Set Device and Args

In [2]:
device = 'cpu'
if torch.cuda.is_available():
    device = 'cuda'
elif hasattr(torch.backends, 'mps') and torch.backends.mps.is_available():
    device = 'mps'
print(f'Using Device: {device}')

num_return_sequences = 5
max_length = 30

Using Device: mps


### Initialize Model

In [3]:
# Initialize model
model = GPT2.from_pretrained('gpt2') # Using trained model weights from HuggingFace, loaded into custom model framework.
# model = GPT2(GPT2Config) # Using untrained Model weights
model.eval()
model.to(device)

  from .autonotebook import tqdm as notebook_tqdm


loading weights from pretrained gpt: gpt2


GPT2(
  (transformer): ModuleDict(
    (wte): Embedding(50257, 768)
    (wpe): Embedding(1024, 768)
    (h): ModuleList(
      (0-11): 12 x Block(
        (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (attn): CausalSelfAttention(
          (c_attn): Linear(in_features=768, out_features=2304, bias=True)
          (c_proj): Linear(in_features=768, out_features=768, bias=True)
        )
        (ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (mlp): MLP(
          (c_fc): Linear(in_features=768, out_features=3072, bias=True)
          (gelu): GELU(approximate='tanh')
          (c_proj): Linear(in_features=3072, out_features=768, bias=True)
        )
      )
    )
    (ln_f): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
  )
  (lm_head): Linear(in_features=768, out_features=50257, bias=False)
)

### Inference

In [4]:
# Embed Tokens
enc = tiktoken.get_encoding('gpt2')
tokens = enc.encode("Hello, I'm a language model,")
tokens


[15496, 11, 314, 1101, 257, 3303, 2746, 11]

In [5]:
tokens = torch.tensor(tokens, dtype=torch.long)
tokens = tokens.unsqueeze(0).repeat(num_return_sequences, 1) # (5,8)
x = tokens.to(device)
x


tensor([[15496,    11,   314,  1101,   257,  3303,  2746,    11],
        [15496,    11,   314,  1101,   257,  3303,  2746,    11],
        [15496,    11,   314,  1101,   257,  3303,  2746,    11],
        [15496,    11,   314,  1101,   257,  3303,  2746,    11],
        [15496,    11,   314,  1101,   257,  3303,  2746,    11]],
       device='mps:0')

In [6]:
# Generate! Right now x is (B, T) where B=5, and T=8
# Set the seed to 42
torch.manual_seed(42)
torch.cuda.manual_seed(42)
while x.size(1) < max_length:
    with torch.no_grad():
        logits, loss = model(x) # (B, T, vocab_size)
        # Take the logits at the last position
        logits = logits[:, -1, :] # (B, vocab_size)
        # Get the probabilities
        probs = F.softmax(logits, dim=-1)
        # Do top-k sampling of 50.
        topk_probs, topk_indices = torch.topk(probs, 50, dim=-1)
        # Select a token from the top-k probabilities
        ix = torch.multinomial(topk_probs, 1)
        # Gather the corresponding indices
        xcol = torch.gather(topk_indices, -1, ix)
        # Append to the sequence
        x = torch.cat((x, xcol), dim=1)

for i in range(num_return_sequences):
    tokens = x[i, :max_length].tolist()
    decoded = enc.decode(tokens)
    print(">", decoded)

> Hello, I'm a language model, which means I'm familiar with it, but I'm not fluent in that. Well, with that said,
> Hello, I'm a language model, and the syntax, to make use of it, is pretty good. So why do you have that and not
> Hello, I'm a language model, I'm doing this work in Python, and then I'm writing code for Haskell.

So we can
> Hello, I'm a language model, and you're making assumptions about my use of them. I'm not a natural language learner. I'm
> Hello, I'm a language model, well, I'm from Java and have to write a programming language for it. I have my own vocabulary because
