In [1]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
if torch.cuda.is_available():
    device = torch.device("cuda")
elif torch.backends.mps.is_available():
    device = torch.device("mps")
else:
    device = torch.device("cpu")
print(device)

mps


In [3]:
model_name = "Qwen/Qwen2-0.5B"
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    torch_dtype="auto",
    device_map="auto"
)
tokenizer = AutoTokenizer.from_pretrained(model_name)

Some parameters are on the meta device because they were offloaded to the disk.


In [55]:
prompt = "上海的天气是"
messages = [
    {"role": "system", "content": "You are a helpful assistant."},
    {"role": "user", "content": prompt}
]
text = tokenizer.apply_chat_template(
    messages,
    tokenize=False,
    add_generation_prompt=True
)
input_ids = tokenizer([text], return_tensors="pt").to(device).input_ids

In [65]:
beam_size = 2
max_len = 10
eos_token_id = tokenizer.eos_token_id
all_possible_ans = [(input_ids, 1)]
generate_times = 0

while generate_times < max_len:
    new_all_possible_ans = []
    for idx, score in all_possible_ans:
        if idx[0,-1].item() == eos_token_id:
            new_all_possible_ans.append((idx, score))
            continue
        model_outputs = model(idx)
        scores, indices = torch.topk(model_outputs.logits[0,-1,:], k=beam_size)
        for k in range(beam_size):
            output_ids = torch.cat((idx, indices[k].unsqueeze(0).unsqueeze(0)), dim=-1)
            new_all_possible_ans.append((output_ids, score * scores[k]))
    all_possible_ans = sorted(new_all_possible_ans, key=lambda x:x[1], reverse=True)[:beam_size]
    generate_times += 1
    print(f"完成第{generate_times}次生成")

完成第1次生成
完成第2次生成
完成第3次生成
完成第4次生成
完成第5次生成
完成第6次生成
完成第7次生成
完成第8次生成
完成第9次生成
完成第10次生成


In [67]:
for k in range(beam_size):
    print(tokenizer.batch_decode(all_possible_ans[k][0]))

['<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n<|im_start|>user\n上海的天气是<|im_end|>\n<|im_start|>assistant\n根据文章内容，上海的天气可以概括为']
['<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n<|im_start|>user\n上海的天气是<|im_end|>\n<|im_start|>assistant\n根据文章内容，上海的天气可以概括如下']
