In [1]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer

# 指定模型路径
model_path = "./output/final_model/qwen-medical-qlora-lr0.0001-bs32-r16"

# ================================
# 1. 加载分词器 & 模型
# ================================
print(f"正在加载分词器 from {model_path}...")
tokenizer = AutoTokenizer.from_pretrained(
    model_path,
    trust_remote_code=True,
    local_files_only=True
)

print(f"正在加载模型 from {model_path}...")
model = AutoModelForCausalLM.from_pretrained(
    model_path,
    trust_remote_code=True,
    local_files_only=True
)
print("模型加载完成！")

# ================================
# 2. 定义预测函数
# ================================
def predict(text: str) -> str:
    print("正在生成医学专家报告...")
    model_inputs = tokenizer([text], return_tensors="pt").to(model.device)

    generated_ids = model.generate(
        **model_inputs,
        max_new_tokens=600,
        do_sample=True,
        temperature=0.7,
        top_p=0.9
    )

    return tokenizer.decode(generated_ids[0], skip_special_tokens=True)

# ================================
# 3. 格式化输出为医学报告
# ================================
def format_report(question: str, raw_answer: str) -> str:
    """
    将模型回答包装成医学专家报告
    """
    report = f"""
# 🩺 医学专家报告

**患者问题**  
{question}

**专家思考**  
{raw_answer}

**医学机制**  
（模型需要解释相关遗传、病理或生理机制）

**临床建议**  
（模型需要总结简明结论，并给出是否需要就医的建议）
"""
    return report.strip()

# ================================
# 4. 主程序：交互输入 + 报告输出
# ================================
if __name__ == "__main__":
    while True:
        user_input = input("\n请输入患者问题 (输入 '退出' 结束): ").strip()
        if user_input.lower() in ["退出", "exit", "quit"]:
            print("程序已结束。")
            break

        raw_answer = predict(user_input)
        report = format_report(user_input, raw_answer)

        print("\n================= 医学专家报告 =================")
        print(report)
        print("================================================")

正在加载分词器 from output/checkpoint-1084...
正在加载模型 from output/checkpoint-1084...


2025-08-26 16:50:39.921365: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-08-26 16:50:40.859290: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


模型加载完成！



请输入患者问题 (输入 '退出' 结束):  退出


程序已结束。
