In [None]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
from peft import LoraConfig

model_id = "qwen3"

quantization_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_quant_type="nf4"
)


model = AutoModelForCausalLM.from_pretrained(model_id, quantization_config=quantization_config)
tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
tokenizer.add_special_tokens({'pad_token': '<PAD>'})

In [None]:
lora_config = LoraConfig(
    r=16,
    target_modules=["q_proj", "v_proj", "up_proj", "down_proj"],
    bias="none",
    task_type="CAUSAL_LM",
    modules_to_save=["lm_head"],
    lora_dropout=0.05
)

model.add_adapter(lora_config)

In [None]:
from datasets import load_dataset
train_dataset = load_dataset("json", data_files="train_data.json", split='train')
print(train_dataset.shape)

In [None]:
from transformers import TrainingArguments

per_device_train_batch_size = 4
gradient_accumulation_steps = 4
optim = "paged_adamw_32bit"
logging_steps = 10
learning_rate = 1e-4
max_grad_norm = 0.3
num_train_epochs = 2
warmup_ratio = 0.03
lr_scheduler_type = "constant"

training_arguments = TrainingArguments(
    per_device_train_batch_size=per_device_train_batch_size,
    gradient_accumulation_steps=gradient_accumulation_steps,
    optim=optim,
    logging_steps=logging_steps,
    learning_rate=learning_rate,
    max_grad_norm=max_grad_norm,
    num_train_epochs=num_train_epochs,
    warmup_ratio=warmup_ratio,
    lr_scheduler_type=lr_scheduler_type,
    gradient_checkpointing=True,
)


In [None]:
from trl import SFTTrainer, DataCollatorForCompletionOnlyLM

response_template = "### ASSISTANT:"
collator = DataCollatorForCompletionOnlyLM(response_template, tokenizer=tokenizer)

trainer = SFTTrainer(
    model=model,
    args=training_arguments,
    train_dataset=train_dataset,
    data_collator=collator,
)

In [None]:
trainer.train()

In [None]:
model_inputs = tokenizer("### USER: \n### ASSISTANT: ", return_tensors="pt").to('cuda')
out = model.generate(**model_inputs, max_new_tokens=15, temperature=0.5, use_cache=True, do_sample=True, early_stopping=True)
out = tokenizer.decode(out[0], skip_special_tokens=True)
print(out)

In [None]:
model.save_pretrained("./qwen-ccb")
tokenizer.save_pretrained("./qwen-ccb")