In [1]:
import json
import random
import wandb
from datasets import Dataset, DatasetDict, Features, Value, load_dataset
from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments, Trainer
from peft import get_peft_model, LoraConfig, TaskType
from sklearn.metrics import accuracy_score

In [2]:
wandb.init(project="llama-medx-reasoning", name="v3.2-lora-pubmedqa", config={"model": "Llama-medx-v3.2"})

model_name = "skumar9/Llama-medx_v3.2"
model = AutoModelForCausalLM.from_pretrained(model_name)
tokenizer = AutoTokenizer.from_pretrained(model_name)

<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize?ref=models
wandb: Paste an API key from your profile and hit enter:

 ··········


[34m[1mwandb[0m: No netrc file found, creating one.
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33measonwangzk[0m ([33measonwangzk-the-university-of-chicago[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/733 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/23.9k [00:00<?, ?B/s]

Fetching 4 files:   0%|          | 0/4 [00:00<?, ?it/s]

model-00003-of-00004.safetensors:   0%|          | 0.00/4.92G [00:00<?, ?B/s]

model-00002-of-00004.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

model-00001-of-00004.safetensors:   0%|          | 0.00/4.98G [00:00<?, ?B/s]

model-00004-of-00004.safetensors:   0%|          | 0.00/1.17G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/169 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/51.2k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/9.08M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/419 [00:00<?, ?B/s]

In [3]:
with open('./ori_pqal.json', 'r') as f:
    ori_data = json.load(f)

In [4]:
data_list = []
for pid, sample in ori_data.items():
    question = sample["QUESTION"]
    context = " ".join(sample["CONTEXTS"])
    final_decision = sample["final_decision"].lower()
    long_answer = sample["LONG_ANSWER"]

    prompt = f"Question: {question}\n\nContext: {context}\n\nAnswer:"
    data_list.append({"question": prompt, "answer": long_answer, "label": final_decision})

In [5]:
random.shuffle(data_list)
split_idx = int(0.8 * len(data_list))
train_data = data_list[:split_idx]
test_data = data_list[split_idx:]

features = Features({
    'question': Value('string'),
    'answer': Value('string'),
    'label': Value('string')
})

dataset = DatasetDict({
    "train": Dataset.from_list(train_data, features=features),
    "test": Dataset.from_list(test_data, features=features)
})

In [6]:
def tokenize_function(examples):
    full_texts = [
        f"Question: {q.strip()}\nAnswer: {a.strip()}"
        for q, a in zip(examples["question"], examples["answer"])
    ]


    tokenized = tokenizer(
        full_texts,
        truncation=True,
        padding="max_length",
        max_length=512
    )


    tokenized["labels"] = [
        [token if token != tokenizer.pad_token_id else -100 for token in seq]
        for seq in tokenized["input_ids"]
    ]

    return tokenized

tokenized_dataset = dataset.map(tokenize_function, batched=True)

Map:   0%|          | 0/800 [00:00<?, ? examples/s]

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

In [7]:
tokenized_dataset = tokenized_dataset.remove_columns(["label",'question', 'answer' ])

In [10]:
tokenized_dataset

DatasetDict({
    train: Dataset({
        features: ['input_ids', 'attention_mask', 'labels'],
        num_rows: 800
    })
    test: Dataset({
        features: ['input_ids', 'attention_mask', 'labels'],
        num_rows: 200
    })
})

In [8]:
import torch
from transformers import AutoModelForCausalLM, TrainingArguments, Trainer
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training, TaskType

model = AutoModelForCausalLM.from_pretrained(
    model_name,
    torch_dtype=torch.float16,
    device_map="auto",
    attn_implementation="eager",
)

peft_config = LoraConfig(
    task_type=TaskType.CAUSAL_LM,
    r=16,
    lora_alpha=32,
    lora_dropout=0.05,
    target_modules=["q_proj", "v_proj"]
)
model = get_peft_model(model, peft_config)
model = prepare_model_for_kbit_training(model)


Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

In [9]:
print("=== Trainable parameters after prepare_model_for_kbit_training ===")
for name, param in model.named_parameters():
    print(f"{name}: {param.requires_grad}")

=== Trainable parameters after prepare_model_for_kbit_training ===
base_model.model.model.embed_tokens.weight: False
base_model.model.model.layers.0.self_attn.q_proj.base_layer.weight: False
base_model.model.model.layers.0.self_attn.q_proj.lora_A.default.weight: False
base_model.model.model.layers.0.self_attn.q_proj.lora_B.default.weight: False
base_model.model.model.layers.0.self_attn.k_proj.weight: False
base_model.model.model.layers.0.self_attn.v_proj.base_layer.weight: False
base_model.model.model.layers.0.self_attn.v_proj.lora_A.default.weight: False
base_model.model.model.layers.0.self_attn.v_proj.lora_B.default.weight: False
base_model.model.model.layers.0.self_attn.o_proj.weight: False
base_model.model.model.layers.0.mlp.gate_proj.weight: False
base_model.model.model.layers.0.mlp.up_proj.weight: False
base_model.model.model.layers.0.mlp.down_proj.weight: False
base_model.model.model.layers.0.input_layernorm.weight: False
base_model.model.model.layers.0.post_attention_layernorm.

In [10]:
for name, param in model.named_parameters():
    if "lora" in name:
        param.requires_grad = True

print("\n=== Trainable parameters after manual unfreeze ===")
for name, param in model.named_parameters():
    if param.requires_grad:
        print(f"✅ {name} — shape: {tuple(param.shape)}")


=== Trainable parameters after manual unfreeze ===
✅ base_model.model.model.layers.0.self_attn.q_proj.lora_A.default.weight — shape: (16, 4096)
✅ base_model.model.model.layers.0.self_attn.q_proj.lora_B.default.weight — shape: (4096, 16)
✅ base_model.model.model.layers.0.self_attn.v_proj.lora_A.default.weight — shape: (16, 4096)
✅ base_model.model.model.layers.0.self_attn.v_proj.lora_B.default.weight — shape: (1024, 16)
✅ base_model.model.model.layers.1.self_attn.q_proj.lora_A.default.weight — shape: (16, 4096)
✅ base_model.model.model.layers.1.self_attn.q_proj.lora_B.default.weight — shape: (4096, 16)
✅ base_model.model.model.layers.1.self_attn.v_proj.lora_A.default.weight — shape: (16, 4096)
✅ base_model.model.model.layers.1.self_attn.v_proj.lora_B.default.weight — shape: (1024, 16)
✅ base_model.model.model.layers.2.self_attn.q_proj.lora_A.default.weight — shape: (16, 4096)
✅ base_model.model.model.layers.2.self_attn.q_proj.lora_B.default.weight — shape: (4096, 16)
✅ base_model.model

In [12]:
model.gradient_checkpointing_enable()
model.train()

sample = tokenized_dataset["train"][0]
batch = {
    "input_ids": torch.tensor(sample["input_ids"]).unsqueeze(0).to(model.device),
    "attention_mask": torch.tensor(sample["attention_mask"]).unsqueeze(0).to(model.device),
    "labels": torch.tensor(sample["labels"]).unsqueeze(0).to(model.device),
}
outputs = model(**batch)
loss = outputs.loss
print("📌 Loss value:", loss.item())
print("🧠 loss.requires_grad:", loss.requires_grad)
print("🧬 loss.grad_fn:", loss.grad_fn)
try:
    loss.backward()
    print("✅ Backward successful!")
except Exception as e:
    print("❌ Backward failed:", e)

📌 Loss value: 2.053631544113159
🧠 loss.requires_grad: False
🧬 loss.grad_fn: None
❌ Backward failed: element 0 of tensors does not require grad and does not have a grad_fn


In [10]:
training_args = TrainingArguments(
    output_dir="./llama-medx-ori_pqal-lora",
    per_device_train_batch_size=4,
    gradient_accumulation_steps=8,
    num_train_epochs=3,
    logging_dir="./logs",
    learning_rate=2e-4,
    weight_decay=0.01,
    bf16=True,
    save_steps=1000,
    logging_steps=50,
    report_to="wandb",
    run_name="ori_pqal-lora-run",
    evaluation_strategy="no",
    save_strategy="steps",
    load_best_model_at_end=False,
    label_names=["labels"]
)

In [12]:
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset["train"],
    tokenizer=tokenizer,
)


  trainer = PeftTrainer(


In [None]:
trainer.train()

In [13]:
import torch

# === 1. 取出一个样本，手动构造 batch ===
sample = tokenized_dataset["train"][0]

batch = {
    "input_ids": torch.tensor(sample["input_ids"]).unsqueeze(0).to("cuda"),
    "attention_mask": torch.tensor(sample["attention_mask"]).unsqueeze(0).to("cuda"),
    "labels": torch.tensor(sample["labels"]).unsqueeze(0).to("cuda"),
}

# === 2. 启用训练模式 ===
model.train()

# === 3. 前向传播 ===
outputs = model(**batch)
loss = outputs.loss

print("📌 Loss value:", loss.item())
print("🧠 loss.requires_grad:", loss.requires_grad)
print("🧬 loss.grad_fn:", loss.grad_fn)

# === 4. 尝试反向传播 ===
try:
    loss.backward()
    print("✅ Backward successful!")
except Exception as e:
    print("❌ Backward failed:", e)

# === 5. 查看可训练参数（确认 LoRA 激活） ===
print("\n🔍 Trainable parameters:")
for name, p in model.named_parameters():
    if p.requires_grad:
        print(f"✅ {name} — shape: {tuple(p.shape)}")



📌 Loss value: 2.253511905670166
🧠 loss.requires_grad: False
🧬 loss.grad_fn: None
❌ Backward failed: element 0 of tensors does not require grad and does not have a grad_fn

🔍 Trainable parameters:
