In [1]:
import os

os.environ["HF_HOME"] = "/workspace/.hf_cache"
os.environ["HF_DATASETS_CACHE"] = "/workspace/.hf_cache/datasets"
os.environ["TRANSFORMERS_CACHE"] = "/workspace/.hf_cache/transformers"

os.makedirs("/workspace/.hf_cache", exist_ok=True)

In [4]:
import torch
from datasets import load_dataset
from transformers import (
    AutoTokenizer,
    AutoModelForCausalLM,
    TrainingArguments
)
from peft import LoraConfig, get_peft_model
from trl import SFTTrainer

model_path = "/workspace/AAIPL/hf_models/models--Qwen--Qwen2.5-14B-Instruct/snapshots/cf98f3b3bbb457ad9e2bb7baf9a0125b6b88caa8"

dataset_path = "synthetic_dataset.json"


In [10]:
with open("synthetic_dataset.json", "r") as f:
    raw = f.read()

print("File length:", len(raw))


File length: 568001


In [11]:
import re

# Remove trailing comma before final ]
fixed = re.sub(r',\s*\]', ']', raw)

# Try loading
import json
data = json.loads(fixed)

print("Loaded objects:", len(data))


Loaded objects: 312


In [12]:
with open("synthetic_dataset_clean.json", "w") as f:
    json.dump(data, f, indent=2)

print("Saved clean dataset.")


Saved clean dataset.


In [14]:

dataset = load_dataset(
    "json",
     data_files="synthetic_dataset_clean.json",
    split="train"
)

tokenizer = AutoTokenizer.from_pretrained(
    model_path,
    local_files_only=True
)

tokenizer.pad_token = tokenizer.eos_token

Generating train split: 0 examples [00:00, ? examples/s]

In [18]:

model = AutoModelForCausalLM.from_pretrained(
    model_path,
    torch_dtype=torch.bfloat16,  # MI300 â†’ BF16
    device_map="auto",
    local_files_only=True
)

model.config.use_cache = False

`torch_dtype` is deprecated! Use `dtype` instead!


Loading checkpoint shards:   0%|          | 0/8 [00:00<?, ?it/s]

In [19]:
from peft import LoraConfig, get_peft_model

lora_config = LoraConfig(
    r=16,
    lora_alpha=32,
    target_modules=["q_proj","k_proj","v_proj","o_proj"],
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM"
)

model = get_peft_model(model, lora_config)
model.print_trainable_parameters()


trainable params: 25,165,824 || all params: 14,795,199,488 || trainable%: 0.1701


In [20]:
from transformers import AutoTokenizer

tokenizer = AutoTokenizer.from_pretrained(
    model_path,
    local_files_only=True
)

tokenizer.pad_token = tokenizer.eos_token

def tokenize(example):
    text = tokenizer.apply_chat_template(
        example["messages"],
        tokenize=False,
        add_generation_prompt=False
    )

    tokens = tokenizer(
        text,
        truncation=True,
        max_length=1024,
        padding="max_length"
    )

    tokens["labels"] = tokens["input_ids"].copy()
    return tokens

dataset = dataset.map(tokenize, remove_columns=["messages"])


Map:   0%|          | 0/312 [00:00<?, ? examples/s]

In [22]:
from transformers import TrainingArguments

training_args = TrainingArguments(
    output_dir="./qwen_lora_output",
    per_device_train_batch_size=4,
    gradient_accumulation_steps=4,
    num_train_epochs=5,
    learning_rate=2e-4,
    logging_steps=10,
    save_strategy="epoch",
    bf16=True,
    optim="adamw_torch",
    report_to="none"
)


In [23]:
from transformers import Trainer

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=dataset
)

trainer.train()


Step,Training Loss
10,3.6878
20,0.3409
30,0.2425
40,0.1977
50,0.191
60,0.1699
70,0.1633
80,0.1669
90,0.1582
100,0.1621


TrainOutput(global_step=100, training_loss=0.5480436038970947, metrics={'train_runtime': 309.3893, 'train_samples_per_second': 5.042, 'train_steps_per_second': 0.323, 'total_flos': 1.3434436989222912e+17, 'train_loss': 0.5480436038970947, 'epoch': 5.0})

In [24]:
model.save_pretrained("qwen_lora_adapter")
tokenizer.save_pretrained("qwen_lora_adapter")


('qwen_lora_adapter/tokenizer_config.json',
 'qwen_lora_adapter/special_tokens_map.json',
 'qwen_lora_adapter/chat_template.jinja',
 'qwen_lora_adapter/vocab.json',
 'qwen_lora_adapter/merges.txt',
 'qwen_lora_adapter/added_tokens.json',
 'qwen_lora_adapter/tokenizer.json')

In [30]:
from peft import PeftModel

base_model = AutoModelForCausalLM.from_pretrained(
    model_path,
    torch_dtype=torch.bfloat16,
    device_map="auto",
    local_files_only=True
)

model = PeftModel.from_pretrained(base_model, "qwen_lora_adapter")

messages = [
    {
        "role": "system",
        "content": "You are a logical reasoning expert. Answer strictly in valid JSON format with no extra commentary."
    },
    {
        "role": "user",
        "content": """Question:
Five friends A, B, C, D, E are sitting in a row...
Choices:
A) A
B) B
C) C
D) D

Respond ONLY in this JSON format:
{
  "answer": "A/B/C/D",
  "reasoning": "brief reasoning under 100 words"
}"""
    }
]

text = tokenizer.apply_chat_template(
    messages,
    tokenize=False,
    add_generation_prompt=True  # IMPORTANT
)

inputs = tokenizer(text, return_tensors="pt").to(model.device)

output = model.generate(
    **inputs,
    max_new_tokens=120,
    do_sample=False,
    repetition_penalty=1.1,
    pad_token_id=tokenizer.eos_token_id
)

decoded = tokenizer.decode(output[0], skip_special_tokens=True)

import re
match = re.search(r'\{[\s\S]*?\}', decoded)
final_output = match.group(0) if match else decoded

print(final_output)


Loading checkpoint shards:   0%|          | 0/8 [00:00<?, ?it/s]

{
  "answer": "A/B/C/D",
  "reasoning": "brief reasoning under 100 words"
}
