In [1]:
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch

model_name = "meta-llama/Llama-3.2-1B"
# model_name = "EleutherAI/pythia-1b"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.bfloat16)

  from .autonotebook import tqdm as notebook_tqdm


In [11]:
prompt = "The capital of France is"
inputs = tokenizer(prompt, return_tensors="pt")

with torch.inference_mode():
    outputs = model(**inputs)
    logits = outputs.logits

In [12]:
logits.shape

torch.Size([1, 5, 50304])

In [14]:
greedy_tokens = torch.argmax(logits, dim=-1)
greedy_tokens_v2 = torch.max(logits, dim=-1).indices
print(greedy_tokens)
print(greedy_tokens_v2)

tensor([[1246,  273,  253,   13, 7785]])
tensor([[1246,  273,  253,   13, 7785]])


In [18]:
print("Ground truth\tPrediction")
for actual_token, greedy_token in zip(inputs.input_ids[0], greedy_tokens[0]):
    print(tokenizer.decode([actual_token.item()]), "\t\t",tokenizer.decode([greedy_token.item()]))

Ground truth	Prediction
The 		  present
 capital 		  of
 of 		  the
 France 		 ,
 is 		  Paris


In [2]:
multi_prompt = ["The capital of France is", "The capital of Italy is", "The capital of Spain is"]
inputs = tokenizer(multi_prompt, return_tensors="pt")

with torch.inference_mode():
    outputs = model(**inputs)
    logits = outputs.logits

In [3]:
logits.shape

torch.Size([3, 6, 128256])

In [4]:
last_token_logits = logits[:, -1, :]
next_token_probs = torch.nn.functional.softmax(last_token_logits, dim=-1)
next_token_probs.shape

torch.Size([3, 128256])

In [5]:
predicted_token_ids = torch.argmax(next_token_probs, dim=-1)
predicted_text = tokenizer.batch_decode(predicted_token_ids, skip_special_tokens=True)
print(predicted_text)

[' Paris', ' Rome', ' Madrid']
