In [25]:
import os
os.environ['CUDA_VISIBLE_DEVICES'] = '5'  
import torch
import pandas as pd
from peft import PeftModel,PeftConfig
from datasets import load_dataset
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline



In [26]:
model_name = "meta-llama/Llama-3.2-3B-Instruct"
adapter_name = "/playpen/jesse/drug_repurpose/grpo_startup/results/20250530_1705/models/llama32-3b-pagerank-partial-baseline-grpo-lora/final_model"
token = os.getenv("HF_TOKEN")

peft_cfg = PeftConfig.from_pretrained(adapter_name, use_auth_token=token)
base_name = peft_cfg.base_model_name_or_path or model_name
tokenizer = AutoTokenizer.from_pretrained(adapter_name, use_auth_token=token)

base_model = AutoModelForCausalLM.from_pretrained(base_name, torch_dtype="auto", use_auth_token=token)
base_model.resize_token_embeddings(len(tokenizer))

model = PeftModel.from_pretrained(base_model, adapter_name, torch_dtype="auto")

device = "cuda:0" if torch.cuda.is_available() else "cpu"
model = model.to(device)

Loading checkpoint shards: 100%|██████████| 2/2 [00:00<00:00, 42.42it/s]


In [29]:
# Triaining dataset
input_text = "Question: Is autosomal recessive severe congenital neutropenia due to G6PC3 deficiency an indication for Fluoxetine? directly answer me with $YES$ or $NO$\nANSWER:"
inputs = tokenizer(input_text, return_tensors="pt").to(model.device)
print(inputs)

output = model.generate(**inputs, max_new_tokens=1000, do_sample=True, temperature=0.2)
answer = tokenizer.decode(output[0], skip_special_tokens=True)
answer = answer.replace(input_text, "").strip()
answer

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


{'input_ids': tensor([[128000,  14924,     25,   2209,  47972,  53911,  47862,    535,  15748,
          83066,   2223,  26878,    897,  59386,   4245,    311,    480,     21,
           4977,     18,  48294,    459,  28137,    369,  61626,  91703,     30,
           6089,   4320,    757,    449,    400,  14331,      3,    477,    400,
           9173,  26101,  11954,  40451,     25]], device='cuda:0'), 'attention_mask': tensor([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]], device='cuda:0')}


'$NO$'

In [30]:
# Testing dataset
input_text = "Question: Is pancreatitis an indication for Spironolactone? directly answer me with $YES$ or $NO$\nANSWER:"
inputs = tokenizer(input_text, return_tensors="pt").to(model.device)

output = model.generate(**inputs, max_new_tokens=1000, do_sample=True, temperature=0.2)
answer = tokenizer.decode(output[0], skip_special_tokens=True)
answer = answer.replace(input_text, "").strip()
answer

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


'$NO$'

In [None]:
# Triamcinolone,disease,aspiration pneumonia (disease)