# Steering examples on big models

In [1]:
import torch as t
import ave
from transformers import LlamaTokenizer, LlamaForCausalLM
from accelerate import init_empty_weights, load_checkpoint_and_dispatch

In [2]:
# Attempt to load llama-13b
model_path = '../models/llama-13B'
device = 'mps'

with init_empty_weights():
    model = LlamaForCausalLM.from_pretrained(model_path)
    model.tie_weights() # in case checkpoint doesn't contain duplicate keys for tied weights

model = load_checkpoint_and_dispatch(model, model_path, device_map={'': device}, dtype=t.float32, no_split_module_classes=["LlamaDecoderLayer"])
tokenizer = LlamaTokenizer.from_pretrained(model_path)

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

In [3]:
# Generate a sequence from the model
tokens = model.generate(
    **ave.tokenize(tokenizer, ["J'aime le chocolat = I like chocolate\n祝你一天过得愉快 ="], device='mps'),
    max_new_tokens=100, temperature=0.8, top_p=0.95, do_sample=True,
)
# hyperparameters from https://github.com/facebookresearch/llama/blob/main/example.py
print(tokenizer.decode(tokens[0].tolist(), skip_special_tokens=True))

J'aime le chocolat = I like chocolate
祝你一天过得愉快 = You look so beautiful today
The word 着 (zhu) has two meanings: 1) to wear or 2) to have.
You can tell the meaning of the word 着 by looking at the context. The sentence J'aime le chocolat is about wearing. But the sentence 祮 you天过得愉快 is about having.
There are two ways to form the present tense
