In [2]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
from trl import setup_chat_format

# Hugging Face model id
model_id = "EleutherAI/gpt-neo-1.3B"

# BitsAndBytesConfig int-4 config
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True, bnb_4bit_use_double_quant=True, bnb_4bit_quant_type="nf4", bnb_4bit_compute_dtype=torch.bfloat16
)

# Load model and tokenizer
model = AutoModelForCausalLM.from_pretrained(
    model_id,
    device_map="auto",
    #attn_implementation="flash_attention_2",
    torch_dtype=torch.bfloat16,
    quantization_config=bnb_config
)
tokenizer = AutoTokenizer.from_pretrained(model_id)
tokenizer.padding_side = 'right' # to prevent warnings

# # set chat template to OAI chatML, remove if you start from a fine-tuned model
model, tokenizer = setup_chat_format(model, tokenizer)

In [None]:
from peft import LoraConfig

# LoRA config based on QLoRA paper & Sebastian Raschka experiment
peft_config = LoraConfig(
        lora_alpha=128,
        lora_dropout=0.05,
        r=256,
        bias="none",
        target_modules="all-linear",
        task_type="CAUSAL_LM",
)

In [None]:
# free the memory again
del model
torch.cuda.empty_cache()



In [None]:
import torch
from peft import AutoPeftModelForCausalLM
from transformers import AutoTokenizer, pipeline

peft_model_id = "./model"

# Load Model with PEFT adapter
model = AutoPeftModelForCausalLM.from_pretrained(
  peft_model_id,
  device_map="auto",
  torch_dtype=torch.float16
)
# load into pipeline
pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)

In [None]:
from datasets import load_dataset
from tqdm import tqdm
import json

eval_dataset = load_dataset("json", data_files="hospital_test.json", split="train")

def evaluate_position(sample):
    try:
        prompt = pipe.tokenizer.apply_chat_template(sample["messages"][:2], tokenize=False, add_generation_prompt=True)
        outputs = pipe(prompt, max_new_tokens=256, do_sample=True, temperature=0.7, top_k=50, top_p=0.95, eos_token_id=pipe.tokenizer.eos_token_id, pad_token_id=pipe.tokenizer.pad_token_id)
        # 提取验证集的位置信息
        generated_positions = json.loads(sample["messages"][2]["content"])["positions"]
        print(generated_positions)
        # 提取生成的位置信息
        print(json.loads(sample["messages"][2]["content"])["explanation"])
        print(f"Generated Answer:{outputs[0]['generated_text'][len(prompt):].strip()}")
        true_positions = json.loads(outputs[0]["generated_text"][len(prompt):].strip())["positions"]
        print(true_positions)
        # 比较生成的位置信息和验证集中的位置信息是否相等
        if generated_positions == true_positions:
            print("right")
            return 1
        else:
            print("wrong")
            return 0
    except json.JSONDecodeError:
        # 如果无法解析为 JSON，直接输出“error format”
        print("error format")
        return 0

success_rate = []
number_of_eval_samples = 127
# iterate over eval dataset and predict
for s in tqdm(eval_dataset.shuffle().select(range(number_of_eval_samples))):
    success_rate.append(evaluate_position(s))

# compute accuracy
accuracy = sum(success_rate) / len(success_rate)

print(f"Accuracy: {accuracy * 100:.2f}%")

