In [1]:
# %%capture
# !pip install transformers>=4.40.1 accelerate>=0.27.2

- Need to install transformers before downloading the model
- When creating the model, I accidentally set `trust_remote_code=True`, but that ran into futher exception when calling the generator.

In [7]:
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline

# Load model
model_name = "microsoft/Phi-3-mini-4k-instruct"
model = AutoModelForCausalLM.from_pretrained(model_name,
                                             device_map="cuda",
                                             torch_dtype="auto",
                                             trust_remote_code=False)
tokenizer = AutoTokenizer.from_pretrained(model_name)

# Last example, we created a pipeline with model and tokenizer
# this time tokenization and generation done explicitly
prompt = "Write a small funny joke to tell kindergarten kids about dinosaurs"

# Tokenize the prompt
input_ids = tokenizer(prompt, return_tensors="pt").input_ids.to("cuda")

print("Tokenized prompt\n", input_ids)
for id in input_ids[0]:
  print(tokenizer.decode(id))

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Tokenized prompt
 tensor([[14350,   263,  2319,  2090,  1460,  2958,   446,   304,  2649,  2924,
         15064,  8109,   413,  4841,  1048,  4538,  3628,  1295]],
       device='cuda:0')
Write
a
small
fun
ny
jo
ke
to
tell
kind
erg
arten
k
ids
about
din
osa
urs


In [2]:
# Generate the text for the given prompt
generated_output = model.generate(input_ids=input_ids, max_new_tokens=50)
print(generated_output)

tensor([[14350,   263,  2319,  2090,  1460,  2958,   446,   304,  2649,  2924,
         15064,  8109,   413,  4841,  1048,  4538,  3628,  1295, 29889,    13,
            13,  2277, 29937,   673, 29901, 11008,  1258,   278,  4538,  3628,
           332,   748,   304,   278, 11619, 29973,  7311,   372,   471,   714,
           310, 14002, 29991,    13,    13,    13,  4013,  2958,   446,   338,
          2560,   322,  3913,   263,  1708,   373,  3838,   411,   278, 16549,
           376,   449,   310, 14002,  1699,   607,  2794,  1554]],
       device='cuda:0')


In [6]:
# Decoding some values returned by the model
print(tokenizer.decode(278))
print(tokenizer.decode(2794))
print(tokenizer.decode(2958)) # jo
print(tokenizer.decode(446)) # ke
print(tokenizer.decode(304)) # to
print(tokenizer.decode(2649)) # tell

the
means
jo
ke
to
tell


TODO

- [ ] Compare different tokenizers