# 加载 model 和 tokenizer

In [3]:
from transformers import AutoModelForCausalLM, AutoTokenizer
import os
import torch
# 设置HF_HOME环境变量
os.environ['HF_HOME'] = "/root/autodl-fs/pre-trained-models/"
os.environ['HF_ENDPOINT'] = "https://hf-mirror.com"
device = "cuda" # the device to load the model onto

# 加载模型和分词器
model_name = "Qwen/Qwen2-0.5B-Instruct"
tokenizer = AutoTokenizer.from_pretrained(model_name, torch_dtype=torch.float16, device_map="cuda")
model = AutoModelForCausalLM.from_pretrained(model_name, device_map='cuda')

# model.cuda()
# model.device

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


# chat 模式

In [None]:


prompt = "Give me a short introduction to large language model."
messages = [
    {"role": "system", "content": "You are a helpful assistant."},
    {"role": "user", "content": prompt}
]
text = tokenizer.apply_chat_template(
    messages,
    tokenize=False,
    add_generation_prompt=True
)
model_inputs = tokenizer([text], return_tensors="pt").to(device)

generated_ids = model.generate(
    model_inputs.input_ids,
    max_new_tokens=512
)
generated_ids = [
    output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]

response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]

In [None]:
print(text)
print(model_inputs)
print(len(generated_ids))
print(generated_ids[0].shape)
print(response)

# 一般模式

In [None]:
choices = ["A", "B", "C", "D"]
choice_ids = [tokenizer(choice)["input_ids"][0] for choice in choices]
print(choice_ids)

In [None]:
logits = model(model_inputs.input_ids)["logits"]
print(logits)
print(logits.shape)

In [None]:
last_token_logits = logits[:, -1, :]
print(last_token_logits)
print(last_token_logits.shape)

In [None]:
choice_logits = last_token_logits[:, choice_ids].detach().cpu().numpy()
print(choice_logits)

In [None]:
import numpy as np
def softmax(x):
    z = x - max(x)
    numerator = np.exp(z)
    denominator = np.sum(numerator)
    softmax = numerator/denominator
    return softmax
conf = softmax(choice_logits[0])[choices.index("A")]
print(conf)
pred = {0: "A", 1: "B", 2: "C", 3: "D"}[np.argmax(choice_logits[0])]
print(pred)