In [7]:
from transformers import AutoTokenizer, AutoModelForCausalLM

model_name = "gpt2"  # small demo model

tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name)

print("Loaded tokenizer and model:", model_name)


Loaded tokenizer and model: gpt2


In [8]:
text = "Learning LLMs with confidence."

# Turn text into token IDs (numbers)
inputs = tokenizer(text, return_tensors="pt")
print("Input IDs:", inputs["input_ids"])
print("Attention mask:", inputs["attention_mask"])

# See the tokens as strings
tokens = tokenizer.convert_ids_to_tokens(inputs["input_ids"][0])
print("Tokens:", tokens)


Input IDs: tensor([[41730, 27140, 10128,   351,  6628,    13]])
Attention mask: tensor([[1, 1, 1, 1, 1, 1]])
Tokens: ['Learning', 'ĠLL', 'Ms', 'Ġwith', 'Ġconfidence', '.']


In [9]:
import torch

prompt = "Learning LLMs with confidence:"

inputs = tokenizer(prompt, return_tensors="pt")
with torch.no_grad():
    output_ids = model.generate(
        **inputs,
        max_new_tokens=50,
        do_sample=True,
        top_k=50,
        top_p=0.95,
    )

generated = tokenizer.decode(output_ids[0], skip_special_tokens=True)
print(generated)


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Learning LLMs with confidence: A special thanks to:

Robert S. S. Smith

Omaha, OK

Evan J. Caudwell

Norman, OK

John C.

Brown, TX

Gert-


In [13]:
import torch

prompt = "LLM engineering with Python and Next.js."

inputs = tokenizer(prompt, return_tensors="pt")

with torch.no_grad():
    # Ask the model to also return hidden states
    outputs = model(**inputs, output_hidden_states=True)

# For AutoModelForCausalLM, hidden states are in outputs.hidden_states
# Take the last layer
last_hidden = outputs.hidden_states[-1]          # [batch, seq_len, hidden_size]

# Mean-pool over tokens -> one vector for the whole sentence
sentence_embedding = last_hidden.mean(dim=1)     # [batch, hidden_size]

print("Embedding shape:", sentence_embedding.shape)
print("First 10 values:", sentence_embedding[0, :10])


Embedding shape: torch.Size([1, 768])
First 10 values: tensor([ 0.0377, -0.2004, -0.1971, -0.3287,  0.2018,  0.0770,  4.6432,  0.0392,
        -0.2433, -0.1688])
