### Cài đặt thư viện và tải bộ dữ liệu

In [None]:
# Install libs
!pip install-q unsloth==2025.3.15 datasets==3.5.0
!pip install transformers bitsandbytes

from datasets import load_dataset

ds = load_dataset("openlifescienceai/medmcqa")
del ds["test"]

### Modeling

In [None]:
from unsloth import FastLanguageModel

max_seq_length = 2048
model, tokenizer = FastLanguageModel.from_pretrained(
    model_name="unsloth/Llama-3.2-1B-bnb-4bit",
    max_seq_length=max_seq_length,
    load_in_4bit=True,
    dtype=None,
)

model = FastLanguageModel.get_peft_model(
    model,
    r=16,
    lora_alpha=16,
    lora_dropout=0,
    target_modules=[
        "q_proj", "k_proj", "v_proj", "up_proj",
        "down_proj", "o_proj", "gate_proj"],
    use_rslora=True,
    use_gradient_checkpointing="unsloth",
    random_state = 42,
    loftq_config = None,
)
print(model.print_trainable_parameters())

### Preprocessing

In [None]:
data_prompt = """Choose the correct option for the following question.

### Question:
{}

### Choice:
{}

### Answer:
"""

id2label = {
    0: 'A',
    1: 'B',
    2: 'C',
    3: 'D'
}

def formatting_prompt(examples):
    questions = examples["question"]
    opas = examples["opa"]
    opbs = examples["opb"]
    opcs = examples["opc"]
    opds = examples["opd"]
    cops = examples["cop"]

    texts = []
    for idx in range(len(questions)):
        question = questions[idx]
        opa = opas[idx]
        opb = opbs[idx]
        opc = opcs[idx]
        opd = opds[idx]
        answer = id2label[cops[idx]]
        if answer == "A":
            answer = answer + " " + opa
        elif answer == "B":
            answer = answer + " " + opb
        elif answer == "C":
            answer = answer + " " + opc
        elif answer == "D":
            answer = answer + " " + opd

        choices = f"A. {opa}. B. {opb}. C. {opc}. D. {opd}."
        text = data_prompt.format(question, choices)
        texts.append(text)
    return {"text": texts,}

process_ds = ds.map(formatting_prompt, batched=True)

In [None]:
from trl import SFTTrainer
from transformers import TrainingArguments
from unsloth import is_bfloat16_supported

args = TrainingArguments(
    output_dir="med-mcqa-llama-3.2-1B-4bit-lora",
    logging_dir="logs",
    learning_rate=3e-4,
    lr_scheduler_type="linear",
    per_device_train_batch_size=64,
    gradient_accumulation_steps=16,
    num_train_epochs=2,
    eval_strategy="steps",
    save_strategy="steps",
    logging_strategy="steps",
    eval_steps=50,
    save_steps=50,
    logging_steps=50,
    save_total_limit=1,
    load_best_model_at_end=True,
    fp16=not is_bfloat16_supported(),
    bf16=is_bfloat16_supported(),
    optim="adamw_8bit",
    weight_decay=0.01,
    warmup_steps=10,
    seed=0,
 )

trainer=SFTTrainer(
    model=model,
    tokenizer=tokenizer,
    args=args,
    train_dataset=process_ds["train"],
    eval_dataset=process_ds["validation"],
    dataset_text_field="text",
)

trainer.train()

### Save checkpoints

In [None]:
from huggingface_hub import login

login(token="hf_xxxxxxxxxxxxxxxx") # your access token

model.save_pretrained("unsloth-llama-traned")
PEFT_MODEL = "your_huggingface_user_name/Llama-3.2-1B-bnb-4bit-MedMCQA"
model.push_to_hub(PEFT_MODEL, use_auth_token=True)

### Inference

In [None]:
from transformers import pipeline

#Option 1
max_seq_length = 2048
model,tokenizer = FastLanguageModel.from_pretrained(
    model_name = PEFT_MODEL,
    max_seq_length = max_seq_length,
    load_in_4bit = True,
    dtype = None,
)
FastLanguageModel.for_inference(model)
model.to("cuda")

input_text = process_ds["validation"][0]["text"]

inputs = tokenizer(input_text, return_tensors="pt").to("cuda")

output = model.generate(inputs["input_ids"], max_length=128)

generated_text = tokenizer.decode(output[0], skip_special_tokens=True)
print(f"Answer: {generated_text}")

#Answer: D Local anesthesia is effective only when the nerve is not covered by myelin sheath\n

#Option 2
generator = pipeline("text-generation", model="thainq107/med-mcqa-llama-3.2-1B-4bit-lora")

output = generator([process_ds["validation"][0]["text"]], max_new_tokens=128, return_full_text=False)[0]

# [{"generated_text": "D Local anesthesia is effective only when the nerve is not covered by myelin sheath\n"}]