In [1]:
from transformers import AutoTokenizer, AutoModelForCausalLM, AutoModelForImageTextToText, GenerationConfig, pipeline
import torch
import torch.nn as nn

In [2]:
model = AutoModelForCausalLM.from_pretrained(
    "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B",
    attn_implementation="eager",
    torch_dtype=torch.bfloat16,
).to(device="mps")
model.eval()

tokenizer = AutoTokenizer.from_pretrained("deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B")

Sliding Window Attention is enabled but not implemented for `eager`; unexpected results may be encountered.


In [3]:
prompt = "tommorow is the place for all good good zombies come to the aid"
inputs = tokenizer(prompt, return_tensors="pt").to(model.device)

In [4]:
with torch.no_grad():
    raw_outputs = model(**inputs)

In [5]:
prob = torch.softmax(raw_outputs.logits, dim=-1)

In [6]:
top_pr, top_id = torch.topk(prob[0, -1, :], 50)

for t_id, t_pr in zip(top_id, top_pr):
    t_str = tokenizer.decode(t_id.item()).strip()
    print(f"'{t_str}': {t_pr.item():.4f}")

'of': 0.5586
',': 0.1240
'.': 0.0664
'!': 0.0356
'...': 0.0229
'.': 0.0216
'': 0.0190
'...': 0.0179
'in': 0.0139
'during': 0.0115
'!"': 0.0074
'!': 0.0070
'.': 0.0062
'!': 0.0058
'...': 0.0055
'you': 0.0051
'when': 0.0051
'..."': 0.0027
'': 0.0021
'if': 0.0019
'and': 0.0017
'?': 0.0014
'with': 0.0013
',': 0.0011
'from': 0.0011
'"': 0.0011
'me': 0.0011
'?': 0.0011
'."': 0.0011
't': 0.0010
'for': 0.0010
'on': 0.0010
',"': 0.0010
'as': 0.0009
'"': 0.0008
'(': 0.0008
'."': 0.0007
'\n': 0.0007
';': 0.0007
'",': 0.0007
'…': 0.0006
':': 0.0005
'is': 0.0005
'at': 0.0005
'}': 0.0005
'through': 0.0005
'"': 0.0004
'today': 0.0004
'': 0.0004
'...': 0.0004


In [7]:
gencfg = GenerationConfig(do_sample=True, temperature=0.7)

In [8]:
with torch.no_grad():
    outputs = model.generate(
        **inputs,
        generation_config=gencfg,
        max_new_tokens=5,
        return_dict_in_generate=True,
        output_scores=True
    )

Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.


In [9]:
top_sc, top_id = torch.topk(outputs.scores[0]. squeeze(), 50)
for token_id, score in zip(top_id, top_sc):
    token_str = tokenizer.decode(token_id.item()).strip()
    print(f"'{token_str}': {score.item():.4f}")

'of': 24.4643
',': 22.3214
'.': 21.4286
'!': 20.5357
'...': 19.9107
'.': 19.8214
'': 19.6429
'...': 19.5536
'in': 19.1964
'during': 18.9286
'!"': 18.3036
'!': 18.2143
'.': 18.0357
'!': 17.9464
'...': 17.8571
'you': 17.7679
'when': 17.7679
'..."': 16.8750
'': 16.5179
'if': 16.3393
'and': 16.1607
'?': 15.8929
'with': 15.8036
',': 15.6250
'from': 15.6250
'"': 15.6250
'me': 15.6250
'?': 15.5357
'."': 15.5357
't': 15.4464
'for': 15.4464
'on': 15.4464
',"': 15.4464
'as': 15.3571
'"': 15.1786
'(': 15.1786
'."': 15.0000
'\n': 14.9107
';': 14.8214
'",': 14.8214
'…': 14.6429
':': 14.5536
'is': 14.5536
'at': 14.5536
'}': 14.5536
'through': 14.5536
'"': 14.2857
'today': 14.1964
'': 14.0179
'...': 14.0179


In [50]:
gencfg = GenerationConfig(do_sample=True, temperature=1.4)

In [51]:
with torch.no_grad():
    outputs = model.generate(
        **inputs,
        generation_config=gencfg,
        max_new_tokens=5,
        return_dict_in_generate=True,
        output_scores=True
    )

Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.


In [52]:
top_sc, top_id = torch.topk(outputs.scores[1].squeeze(), 50)
for token_id, score in zip(top_id, top_sc):
    token_str = tokenizer.decode(token_id.item()).strip()
    print(f"'{token_str}': {score.item():.4f}")

'to': 14.5536
'in': 13.0357
'.': 12.0536
',': 11.9643
'...': 11.8750
'...': 11.6071
'.': 11.1607
'that': 11.1161
'and': 10.7589
'happening': 10.4464
'are': 10.4464
'!': 10.3571
'': 10.1786
'happen': 10.0446
'coming': 10.0000
'come': 9.9554
'.': 9.9107
'is': 9.9107
',': 9.8661
'...': 9.8214
'...': 9.7321
'from': 9.6429
'!': 9.5536
'about': 9.3304
'happens': 9.1964
'?': 9.1964
'...,': 9.1964
'...': 9.1071
'except': 9.1071
'—': 9.1071
'comes': 9.0179
'for': 9.0179
'must': 8.9732
'you': 8.9732
'should': 8.7500
'will': 8.6161
'?': 8.6161
'minus': 8.5268
'(': 8.4821
'as': 8.4375
'have': 8.3929
'/': 8.3929
''': 8.3929
'now': 8.3482
'living': 8.3036
'...\': 8.3036
'around': 8.3036
';': 8.2143
'related': 8.1696
'at': 8.1696
