In [1]:
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch

In [2]:
if hasattr(torch.backends, "mps") and torch.backends.mps.is_available():
    device = torch.device('mps')
elif torch.cuda.is_available():
    device = torch.device('cuda')
else:
    device = torch.device('cpu')
print('Using device: {0}'.format(device))

Using device: mps


In [3]:
model_name = "allenai/OLMo-2-0425-1B-Instruct"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name,
                                             device_map="auto")
model.eval()

Olmo2ForCausalLM(
  (model): Olmo2Model(
    (embed_tokens): Embedding(100352, 2048, padding_idx=100277)
    (layers): ModuleList(
      (0-15): 16 x Olmo2DecoderLayer(
        (self_attn): Olmo2Attention(
          (q_proj): Linear(in_features=2048, out_features=2048, bias=False)
          (k_proj): Linear(in_features=2048, out_features=2048, bias=False)
          (v_proj): Linear(in_features=2048, out_features=2048, bias=False)
          (o_proj): Linear(in_features=2048, out_features=2048, bias=False)
          (q_norm): Olmo2RMSNorm((2048,), eps=1e-06)
          (k_norm): Olmo2RMSNorm((2048,), eps=1e-06)
        )
        (mlp): Olmo2MLP(
          (gate_proj): Linear(in_features=2048, out_features=8192, bias=False)
          (up_proj): Linear(in_features=2048, out_features=8192, bias=False)
          (down_proj): Linear(in_features=8192, out_features=2048, bias=False)
          (act_fn): SiLU()
        )
        (post_attention_layernorm): Olmo2RMSNorm((2048,), eps=1e-06)
        

In [10]:
input_text = "Write a short poem about Eleazar Wheelock, founder of Dartmouth College."
max_new_tokens = 128
msg = [{"role":"user","content":input_text}]
input_ids = tokenizer.apply_chat_template(msg, 
                                          return_tensors = "pt",
                                          add_generation_prompt = False)

output = model.generate(input_ids.to(device), 
                        do_sample=True, 
                        max_new_tokens = max_new_tokens,
                        temperature = 1.0, 
                        top_p = 0.95)

print(tokenizer.decode(output[0], 
                       skip_special_tokens=False))

<|endoftext|><|user|>
Write a short poem about Eleazar Wheelock, founder of Dartmouth College.
When he founded Dartmouth in 1779, he wasn't sure what he was doing.
But he felt the call of the sea and left his family behind.
He and a group of fellow adventurers traveled 400 miles through wilderness.
After crossing the barrier of a mountain, they founded their college in Hanover, New Hampshire.
Eleazar Wheelock was a visionary and foresaw Dartmouth as an institution that would lead.
Eleazar Wheelock passed away in 1813, but the legacy of his vision lives on today in Dartmouth.

<|endoftext|>
