In [5]:
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch

In [6]:
if hasattr(torch.backends, "mps") and torch.backends.mps.is_available():
    device = torch.device('mps')
elif torch.cuda.is_available():
    device = torch.device('cuda')
else:
    device = torch.device('cpu')
print('Using device: {0}'.format(device))

Using device: mps


In [7]:
model_name = "allenai/OLMo-2-0425-1B-Instruct"

#model = AutoModelForCausalLM.from_pretrained(model_name,
#                                             low_cpu_mem_usage=True,
#                                             dtype=torch.bfloat16,
#                                             device_map="auto")

tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name,
                                             device_map="auto")
model.eval()

Olmo2ForCausalLM(
  (model): Olmo2Model(
    (embed_tokens): Embedding(100352, 2048, padding_idx=100277)
    (layers): ModuleList(
      (0-15): 16 x Olmo2DecoderLayer(
        (self_attn): Olmo2Attention(
          (q_proj): Linear(in_features=2048, out_features=2048, bias=False)
          (k_proj): Linear(in_features=2048, out_features=2048, bias=False)
          (v_proj): Linear(in_features=2048, out_features=2048, bias=False)
          (o_proj): Linear(in_features=2048, out_features=2048, bias=False)
          (q_norm): Olmo2RMSNorm((2048,), eps=1e-06)
          (k_norm): Olmo2RMSNorm((2048,), eps=1e-06)
        )
        (mlp): Olmo2MLP(
          (gate_proj): Linear(in_features=2048, out_features=8192, bias=False)
          (up_proj): Linear(in_features=2048, out_features=8192, bias=False)
          (down_proj): Linear(in_features=8192, out_features=2048, bias=False)
          (act_fn): SiLU()
        )
        (post_attention_layernorm): Olmo2RMSNorm((2048,), eps=1e-06)
        

In [8]:
input_text = "Write a short poem about Eleazar Wheelock, founder of Dartmouth College."
max_new_tokens = 128
msg = [{"role":"user","content":input_text}]
input_ids = tokenizer.apply_chat_template(msg, 
                                          return_tensors = "pt",
                                          add_generation_prompt = False)

output = model.generate(input_ids.to(device), 
                        do_sample=True, 
                        max_new_tokens = max_new_tokens,
                        temperature = 1.0, 
                        top_p = 0.95)

print(tokenizer.decode(output[0], 
                       skip_special_tokens=False))

<|endoftext|><|user|>
Write a short poem about Eleazar Wheelock, founder of Dartmouth College.
by Robert Frost

Eleazar Wheelock
You founded Dartmouth College,
You also taught a bit,
And thought a bit,
Praised God a bit,
And wrote some notes on the Greek language
Ponderous and quite precise
For your young American pupils,
And with a flair for the dramatic
Had grand dreams for the college
And put those dreams into being
A place for youth not just for study

Questions are everywhere in the halls
Of every great college today,
But the first Dartmouth ever stands tall,
And Eleazarâ€™s visions, they continue to be.<|endoftext|>
