In [None]:
import torch
assert torch.cuda.is_available()

In [None]:
import transformers

In [None]:
from transformers import AutoTokenizer, AutoModelForMaskedLM

model_id = "answerdotai/ModernBERT-base"
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForMaskedLM.from_pretrained(model_id)

text = "The capital of France is [MASK]."
inputs = tokenizer(text, return_tensors="pt")
outputs = model(**inputs)

# To get predictions for the mask:
masked_index = inputs["input_ids"][0].tolist().index(tokenizer.mask_token_id)
predicted_token_id = outputs.logits[0, masked_index].argmax(axis=-1)
predicted_token = tokenizer.decode(predicted_token_id)
print("Predicted token:", predicted_token)
# Predicted token:  Paris


Predicted token:  Paris


In [None]:
inputs["input_ids"][0].tolist().index(tokenizer.mask_token_id)

6

In [None]:
text = "[MASK] [MASK] [MASK] [MASK] [MASK] [MASK] [MASK] [MASK] [MASK]."
inputs = tokenizer(text, return_tensors="pt")
outputs = model(**inputs)

# To get predictions for the mask:
masked_index = inputs["input_ids"][0].tolist().index(tokenizer.mask_token_id)
predicted_tokens_id = outputs.logits[0].argmax(axis=-1)
predicted_tokens = tokenizer.decode(predicted_tokens_id)
predicted_tokens

'[CLS]Write the the sentence sentence\n\n to English.[SEP]'

In [None]:
tokenizer.decode?

[0;31mSignature:[0m
[0mtokenizer[0m[0;34m.[0m[0mdecode[0m[0;34m([0m[0;34m[0m
[0;34m[0m    [0mtoken_ids[0m[0;34m:[0m [0mUnion[0m[0;34m[[0m[0mint[0m[0;34m,[0m [0mList[0m[0;34m[[0m[0mint[0m[0;34m][0m[0;34m,[0m [0mForwardRef[0m[0;34m([0m[0;34m'np.ndarray'[0m[0;34m)[0m[0;34m,[0m [0mForwardRef[0m[0;34m([0m[0;34m'torch.Tensor'[0m[0;34m)[0m[0;34m,[0m [0mForwardRef[0m[0;34m([0m[0;34m'tf.Tensor'[0m[0;34m)[0m[0;34m][0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mskip_special_tokens[0m[0;34m:[0m [0mbool[0m [0;34m=[0m [0;32mFalse[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mclean_up_tokenization_spaces[0m[0;34m:[0m [0mbool[0m [0;34m=[0m [0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0;34m**[0m[0mkwargs[0m[0;34m,[0m[0;34m[0m
[0;34m[0m[0;34m)[0m [0;34m->[0m [0mstr[0m[0;34m[0m[0;34m[0m[0m
[0;31mDocstring:[0m
Converts a sequence of ids in a string, using the tokenizer and vocabulary with optio

In [None]:
from transformers import AutoModelForCausalLM, AutoTokenizer
checkpoint = "HuggingFaceTB/SmolLM2-135M-Instruct"

device = "cuda" # for GPU usage or "cpu" for CPU usage
tokenizer = AutoTokenizer.from_pretrained(checkpoint)
# for multiple GPUs install accelerate and do `model = AutoModelForCausalLM.from_pretrained(checkpoint, device_map="auto")`
model = AutoModelForCausalLM.from_pretrained(checkpoint).to(device)

messages = [{"role": "user", "content": "What is gravity?"}]
input_text=tokenizer.apply_chat_template(messages, tokenize=False)
print(input_text)
inputs = tokenizer.encode(input_text, return_tensors="pt").to(device)
outputs = model.generate(inputs, max_new_tokens=50, temperature=0.2, top_p=0.9, do_sample=True)
print(tokenizer.decode(outputs[0]))


tokenizer_config.json:   0%|          | 0.00/3.76k [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/801k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/466k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/2.10M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/655 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/861 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/269M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/132 [00:00<?, ?B/s]

The attention mask is not set and cannot be inferred from input because pad token is same as eos token. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.


<|im_start|>system
You are a helpful AI assistant named SmolLM, trained by Hugging Face<|im_end|>
<|im_start|>user
What is gravity?<|im_end|>

<|im_start|>system
You are a helpful AI assistant named SmolLM, trained by Hugging Face<|im_end|>
<|im_start|>user
What is gravity?<|im_end|>
<|im_start|>assistant
Gravity is a fundamental force of nature that attracts objects with mass towards each other. It is a result of the interaction between the mass of an object and the distance between it and the center of the Earth. The force of gravity


In [None]:
from time import perf_counter

In [None]:
st = perf_counter()
outputs = model.generate(inputs, max_new_tokens=50, do_sample=False)
print(perf_counter() - st)
print(tokenizer.decode(outputs[0]))

1.5267718400000376
<|im_start|>system
You are a helpful AI assistant named SmolLM, trained by Hugging Face<|im_end|>
<|im_start|>user
What is gravity?<|im_end|>
<|im_start|>assistant
Gravity is a fundamental force of nature that attracts objects with mass towards each other. It is a result of the interaction between mass, energy, and space itself. In the context of our universe, gravity is a result of the


In [None]:
st = perf_counter()
outputs = model.generate(inputs, max_new_tokens=100, do_sample=False)
print(perf_counter() - st)
print(tokenizer.decode(outputs[0]))

3.067973028999859
<|im_start|>system
You are a helpful AI assistant named SmolLM, trained by Hugging Face<|im_end|>
<|im_start|>user
What is gravity?<|im_end|>
<|im_start|>assistant
Gravity is a fundamental force of nature that attracts objects with mass towards each other. It is a result of the interaction between mass, energy, and space itself. In the context of our universe, gravity is a result of the curvature of spacetime caused by the presence of mass and energy.

Imagine spacetime as a trampoline. When you place a heavy object, like a bowling ball, on the trampoline, it creates a depression in the surface. This depression


In [None]:
st = perf_counter()
outputs = model.generate(inputs, max_new_tokens=200, do_sample=False)
print(perf_counter() - st)
print(tokenizer.decode(outputs[0]))

6.080845785000292
<|im_start|>system
You are a helpful AI assistant named SmolLM, trained by Hugging Face<|im_end|>
<|im_start|>user
What is gravity?<|im_end|>
<|im_start|>assistant
Gravity is a fundamental force of nature that attracts objects with mass towards each other. It is a result of the interaction between mass, energy, and space itself. In the context of our universe, gravity is a result of the curvature of spacetime caused by the presence of mass and energy.

Imagine spacetime as a trampoline. When you place a heavy object, like a bowling ball, on the trampoline, it creates a depression in the surface. This depression is caused by the object's mass and the energy it contains. The more massive the object, the larger the depression.

Now, when you move an object, such as a bowling ball, it creates a gravitational pull on the surrounding space. This gravitational pull is what causes the bowling ball to move towards the center of the trampoline. The more massive the object, the 

In [None]:
text = "[MASK] [MASK] [MASK] [MASK] [MASK] [MASK] [MASK] [MASK] [MASK]."
inputs = tokenizer(text, return_tensors="pt")
st = perf_counter()
outputs = model(**inputs)
print(inputs["input_ids"].shape[1], perf_counter() - st)

# To get predictions for the mask:
masked_index = inputs["input_ids"][0].tolist().index(tokenizer.mask_token_id)
predicted_tokens_id = outputs.logits[0].argmax(axis=-1)
predicted_tokens = tokenizer.decode(predicted_tokens_id)
predicted_tokens

12 0.07136732900016796


'[CLS]Write the the sentence sentence\n\n to English.[SEP]'

In [None]:
text = "[MASK]" * 100
inputs = tokenizer(text, return_tensors="pt")
st = perf_counter()
outputs = model(**inputs)
print(inputs["input_ids"].shape[1], perf_counter() - st)

# To get predictions for the mask:
masked_index = inputs["input_ids"][0].tolist().index(tokenizer.mask_token_id)
predicted_tokens_id = outputs.logits[0].argmax(axis=-1)
predicted_tokens = tokenizer.decode(predicted_tokens_id)
predicted_tokens

102 0.19231256000011854


'[CLS]\n\n\n\n\n[CLS][CLS]\n\n\n\n\n\n\n\n\n\n\n[SEP][SEP][SEP][SEP][SEP][SEP][SEP][SEP][SEP][SEP]\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n....\n\n\n\n\n..........................\n\n....\n\n..[SEP]'

In [None]:
text = "Hi, [MASK]!" * 25
inputs = tokenizer(text, return_tensors="pt")
st = perf_counter()
outputs = model(**inputs)
print(inputs["input_ids"].shape[1], perf_counter() - st)

# To get predictions for the mask:
masked_index = inputs["input_ids"][0].tolist().index(tokenizer.mask_token_id)
predicted_tokens_id = outputs.logits[0].argmax(axis=-1)
predicted_tokens = tokenizer.decode(predicted_tokens_id)
predicted_tokens

102 0.17626123499985624


'[CLS]Hi, Hi!Hi, Hi!Hi, Hi!Hi, Hi!Hi, Hi!Hi, Hi!Hi, Hi!Hi, Hi!Hi, Hi!Hi, Hi!Hi, Hi!Hi, Hi!Hi, Hi!Hi, Hi!Hi, Hi!Hi, Hi!Hi, Hi!Hi, Hi!Hi, Hi!Hi, Hi!Hi, Hi!Hi, Hi!Hi, Hi!Hi, Hi!Hi, Hi![SEP]'