In [11]:
import os
import gc
import torch

import transformers
from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments, BitsAndBytesConfig
import pandas as pd
from peft import LoraConfig, PeftModel, get_peft_model, prepare_model_for_kbit_training
from trl import DPOTrainer
import bitsandbytes as bnb
from datasets import Dataset
import random
from trl import DPOConfig
import numpy as np

In [12]:
random.seed(42)
np.random.seed(42)

In [13]:
tokenizer = AutoTokenizer.from_pretrained("microsoft/Phi-3.5-mini-instruct", trust_remote_code=True)

In [14]:
pairs_df = pd.read_csv("all_mcq_pairs.csv")

In [15]:
print(f"Loaded {len(pairs_df)} pairs from dataset")

Loaded 4929 pairs from dataset


In [16]:
user_prompts = [
    "Please create a medical multiple-choice question with four possible answers, only one correct.",
    "Generate a single medical multiple-choice question with exactly one correct answer.",
    "I need a medical multiple-choice question (4 options) with one correct answer.",
    "Write a medical MCQ with four answer choices, only one of which is correct.",
    "Produce a medical multiple-choice question with four options and identify a single correct choice.",
]

In [17]:
eval_size = int(len(pairs_df) * 0.2)

eval_indices = np.random.choice(len(pairs_df), size=eval_size, replace=False)
eval_df = pairs_df.iloc[eval_indices].reset_index(drop=True)
train_df = pairs_df.drop(index=eval_indices).reset_index(drop=True)

print(f"Split dataset into {len(train_df)} training pairs and {len(eval_df)} evaluation pairs")

Split dataset into 3944 training pairs and 985 evaluation pairs


In [18]:
eval_dataset = Dataset.from_pandas(eval_df)
train_dataset = Dataset.from_pandas(train_df)

In [19]:
def add_prompt(example):
    prompt = random.choice(user_prompts)
    example["prompt"] = prompt
    return example

In [20]:
train_dataset = train_dataset.map(add_prompt)
eval_dataset = eval_dataset.map(add_prompt)

Map:   0%|          | 0/3944 [00:00<?, ? examples/s]

Map:   0%|          | 0/985 [00:00<?, ? examples/s]

In [21]:
print("\nTraining example:")
print(train_dataset[0])
print("\nEvaluation example:")
print(eval_dataset[0])


Training example:
{'id': 'OIC-131-17-A', 'chosen': 'Question: Which of the following is NOT a potential cause of HTO (Hypotension on Standing) according to the provided information?\na) Recent changes in medication dosage\nb) Hypothyroidism\nc) High blood pressure\nd) Prolonged bed rest', 'rejected': 'Question: What is a priority etiological diagnosis to be sought in HTO?\na) Hypovolaemia and dehydration\nb) Anaemia\nc) Iatrogenia (medication side effects)\nd) Endocrinopathies', 'chosen_source': 'gemma_9b_distractor_quality', 'rejected_source': 'Llama8B_0.1_distractor_quality', 'prompt': 'Please create a medical multiple-choice question with four possible answers, only one correct.'}

Evaluation example:
{'id': 'OIC-259-06-A', 'chosen': 'Question: Which of the following is NOT a characteristic finding in a patient diagnosed with nephrotic syndrome?\na) Hypoalbuminemia\nb) Hyperglycemia\nc) Proteinuria > 3 g/d\nd) Edema in dependent areas', 'rejected': 'Question: What is the typical ag

In [22]:
peft_config = LoraConfig(
    r=16,
    lora_alpha=32,
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM",
    target_modules="all-linear")

# bnb_config = BitsAndBytesConfig(
#     load_in_4bit=True,
#     bnb_4bit_use_double_quant=True,
#     bnb_4bit_quant_type="nf4",
#     bnb_4bit_compute_dtype=torch.float16
# )

In [23]:
model = AutoModelForCausalLM.from_pretrained(
    "microsoft/Phi-3.5-mini-instruct",
    torch_dtype=torch.float16,
    load_in_8bit=True,          # This enables 8-bit quantization
    device_map="auto"
)

model.config.use_cache = False
model = prepare_model_for_kbit_training(model)

model = get_peft_model(model, peft_config)

The `load_in_4bit` and `load_in_8bit` arguments are deprecated and will be removed in the future versions. Please, pass a `BitsAndBytesConfig` object in `quantization_config` argument instead.


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [24]:
def print_trainable_parameters(model):
    """
    Prints the number of trainable parameters in the model.
    """
    trainable_params = 0
    all_param = 0
    for _, param in model.named_parameters():
        all_param += param.numel()
        if param.requires_grad:
            trainable_params += param.numel()
    print(
        f"trainable params: {trainable_params} || all params: {all_param} || trainable%: {100 * trainable_params / all_param:.2f}"
    )

print_trainable_parameters(model)

trainable params: 25165824 || all params: 3846245376 || trainable%: 0.65


In [32]:
new_model = "model"

training_args = DPOConfig(
    per_device_train_batch_size=4,
    per_device_eval_batch_size=8,
    gradient_accumulation_steps=4,
    gradient_checkpointing=True,
    remove_unused_columns=True,
    learning_rate=5.0e-06,
    evaluation_strategy="epoch",
    logging_strategy="steps",
    lr_scheduler_type="cosine",
    num_train_epochs=8,
    save_strategy="epoch",
    logging_steps=10,
    output_dir=new_model,
    optim="paged_adamw_32bit",
    warmup_steps=10,
    bf16=True,
    report_to="none",
    beta=0.1,
    max_prompt_length=2048,
    max_length=2048
)



In [33]:
dpo_trainer = DPOTrainer(
    model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
    processing_class=tokenizer,
    peft_config=peft_config
)



Extracting prompt in train dataset:   0%|          | 0/3944 [00:00<?, ? examples/s]

Applying chat template to train dataset:   0%|          | 0/3944 [00:00<?, ? examples/s]

Tokenizing train dataset:   0%|          | 0/3944 [00:00<?, ? examples/s]

Extracting prompt in eval dataset:   0%|          | 0/985 [00:00<?, ? examples/s]

Applying chat template to eval dataset:   0%|          | 0/985 [00:00<?, ? examples/s]

Tokenizing eval dataset:   0%|          | 0/985 [00:00<?, ? examples/s]

In [None]:
dpo_trainer.train()