In [1]:
# verify environment

import sys
sys.executable

'c:\\Users\\Emily\\miniconda3\\envs\\llm_steer\\python.exe'

In [1]:
# load model

import os
import torch
from huggingface_hub import login
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline

hf_token = os.environ.get("HUGGINGFACE_HUB_TOKEN", None)
if hf_token:
    login(token=hf_token)

model_id = "meta-llama/Llama-3.2-3B-Instruct"

tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(
    model_id,
    torch_dtype=torch.bfloat16,
    device_map="auto",
)

  from .autonotebook import tqdm as notebook_tqdm


The token has not been saved to the git credentials helper. Pass `add_to_git_credential=True` in this function directly or `--add-to-git-credential` if using via `huggingface-cli` if you want to set the git credential as well.
Token is valid (permission: read).
Your token has been saved to C:\Users\Emily\.cache\huggingface\token
Login successful


Loading checkpoint shards: 100%|██████████| 2/2 [00:06<00:00,  3.01s/it]


In [7]:
# inspect model config

print(model.config)

LlamaConfig {
  "_name_or_path": "meta-llama/Llama-3.2-3B-Instruct",
  "architectures": [
    "LlamaForCausalLM"
  ],
  "attention_bias": false,
  "attention_dropout": 0.0,
  "bos_token_id": 128000,
  "eos_token_id": [
    128001,
    128008,
    128009
  ],
  "head_dim": 128,
  "hidden_act": "silu",
  "hidden_size": 3072,
  "initializer_range": 0.02,
  "intermediate_size": 8192,
  "max_position_embeddings": 131072,
  "mlp_bias": false,
  "model_type": "llama",
  "num_attention_heads": 24,
  "num_hidden_layers": 28,
  "num_key_value_heads": 8,
  "pretraining_tp": 1,
  "rms_norm_eps": 1e-05,
  "rope_scaling": {
    "factor": 32.0,
    "high_freq_factor": 4.0,
    "low_freq_factor": 1.0,
    "original_max_position_embeddings": 8192,
    "rope_type": "llama3"
  },
  "rope_theta": 500000.0,
  "tie_word_embeddings": true,
  "torch_dtype": "bfloat16",
  "transformers_version": "4.45.2",
  "use_cache": true,
  "vocab_size": 128256
}



In [7]:
# test pipeline

pipe = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    torch_dtype=torch.bfloat16,
    device_map="auto",
)

messages = [
    {"role": "system", "content": "You are a helpful assistant."},
    {"role": "user", "content": "Sydney is the capital of Australia. True or False?"},
]

outputs = pipe(messages, max_new_tokens = 256)
print("Model states: ", outputs[0]["generated_text"])

Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Model states:  [{'role': 'system', 'content': 'You are a helpful assistant.'}, {'role': 'user', 'content': 'Sydney is the capital of Australia. True or False?'}, {'role': 'assistant', 'content': 'False. The capital of Australia is actually Canberra, not Sydney. Sydney is the largest city in Australia and is located in the state of New South Wales, but it is not the capital.'}]


In [8]:
# test hidden state access

model_inputs = tokenizer(["Tell me about yourself."], return_tensors="pt", truncation=True).to(model.device)
with torch.no_grad():
    hidden_states = model(model_inputs["input_ids"], output_hidden_states=True).hidden_states
    print(hidden_states)
    print(f"length: {len(hidden_states)}")

(tensor([[[-1.1587e-04,  3.8528e-04, -1.9379e-03,  ...,  2.3937e-04,
          -5.4550e-04,  8.8215e-05],
         [-3.8574e-02,  2.9945e-04,  9.7656e-03,  ...,  1.1719e-02,
           2.4292e-02, -4.3457e-02],
         [ 3.8574e-02, -1.0193e-02,  1.6846e-02,  ...,  1.0864e-02,
          -2.5787e-03, -2.0386e-02],
         [ 1.0586e-04, -2.1484e-02,  3.7537e-03,  ...,  8.2397e-03,
           3.0273e-02, -1.0834e-03],
         [-1.7334e-02,  2.7344e-02,  6.0059e-02,  ..., -7.2754e-02,
          -1.0559e-02, -2.3438e-02],
         [ 7.1716e-03, -3.5553e-03,  2.8809e-02,  ...,  1.0864e-02,
          -8.7891e-03,  8.3008e-03]]], device='cuda:0', dtype=torch.bfloat16), tensor([[[-0.0031,  0.0019,  0.0601,  ...,  0.0510,  0.0021, -0.0049],
         [-0.0693,  0.0232,  0.0505,  ...,  0.0280, -0.0149, -0.0427],
         [ 0.0723, -0.0352, -0.0145,  ..., -0.0013, -0.0048, -0.0186],
         [-0.0035,  0.0018, -0.0757,  ..., -0.0034,  0.0454, -0.0112],
         [-0.0305,  0.0317, -0.0107,  ..., 