In [1]:
!pip install transformers peft trl bitsandbytes datasets

[0m

In [2]:
import os
import gc
import torch

import transformers
from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments, BitsAndBytesConfig
import pandas as pd
from peft import LoraConfig, PeftModel, get_peft_model, prepare_model_for_kbit_training
from trl import KTOTrainer
import bitsandbytes as bnb
from datasets import Dataset
import random
from trl import KTOConfig
import numpy as np

In [3]:
random.seed(42)
np.random.seed(42)

In [4]:
model_id = "meta-llama/Llama-3.2-1B"
# token = os.environ('HF_TOKEN')
token = ''

In [5]:
tokenizer = AutoTokenizer.from_pretrained(model_id, token=token, trust_remote_code=True)

if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

In [6]:
pairs_df = pd.read_csv("./train_dataset_for_kto.csv")

In [7]:
print(f"Loaded {len(pairs_df)} pairs from dataset")

Loaded 15642 pairs from dataset


In [8]:
user_prompts = [
    "Please create a medical multiple-choice question with four possible answers, only one correct.",
    "Generate a single medical multiple-choice question with exactly one correct answer.",
    "I need a medical multiple-choice question (4 options) with one correct answer.",
    "Write a medical MCQ with four answer choices, only one of which is correct.",
    "Produce a medical multiple-choice question with four options and identify a single correct choice.",
]

In [9]:
eval_size = int(len(pairs_df) * 0.2)

eval_indices = np.random.choice(len(pairs_df), size=eval_size, replace=False)
eval_df = pairs_df.iloc[eval_indices].reset_index(drop=True)
train_df = pairs_df.drop(index=eval_indices).reset_index(drop=True)

print(f"Split dataset into {len(train_df)} training pairs and {len(eval_df)} evaluation pairs")

Split dataset into 12514 training pairs and 3128 evaluation pairs


In [10]:
eval_dataset = Dataset.from_pandas(eval_df)
train_dataset = Dataset.from_pandas(train_df)

In [11]:
def add_prompt(example):
    prompt = random.choice(user_prompts)
    example["prompt"] = prompt
    return example

In [12]:
train_dataset = train_dataset.map(add_prompt)
eval_dataset = eval_dataset.map(add_prompt)

Map:   0%|          | 0/12514 [00:00<?, ? examples/s]

Map:   0%|          | 0/3128 [00:00<?, ? examples/s]

In [13]:
print("\nTraining example:")
print(train_dataset[0])
print("\nEvaluation example:")
print(eval_dataset[0])


Training example:
{'id': 'OIC-155-19-A', 'completion': 'Question: What is the recommended treatment for non-necrotizing bacterial dermohypodermatitis in adults?\r\na) Oral antibiotic therapy for 7 days against Staphylococcus aureus\r\nb) Penicillin allergy, additional measures for adults and children\r\nc) Amoxicillin: 50 mg/kg/day in 3 doses with a maximum of 6 g/day\r\nd) Pristinamycin (1 g x 3/day) or Clindamycin (600 mg x 3/day, up to 600 mg x 4/day if weight > 100 kg)', 'label': True, 'source': 'Llama1b_distractor_quality', 'prompt': 'Please create a medical multiple-choice question with four possible answers, only one correct.'}

Evaluation example:
{'id': 'OIC-173-01-A', 'completion': 'Question: What is the primary mode of transmission for zoonoses?\r\na) Directly from animals to humans\r\nb) Indirectly through vectors such as ticks and fleas\r\nc) Only through contaminated food and water\r\nd) Through airborne pathogens', 'label': True, 'source': 'Llama1b_distractor_quality', 

In [14]:
peft_config = LoraConfig(
    r=16,
    lora_alpha=32,
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM",
    target_modules="all-linear")

# bnb_config = BitsAndBytesConfig(
#     load_in_4bit=True,
#     bnb_4bit_use_double_quant=True,
#     bnb_4bit_quant_type="nf4",
#     bnb_4bit_compute_dtype=torch.float16
# )

In [15]:
model = AutoModelForCausalLM.from_pretrained(
    model_id,
    token=token,
    torch_dtype=torch.float16,
    load_in_8bit=True,          # This enables 8-bit quantization
    device_map="auto"
)

model.config.use_cache = False
model = prepare_model_for_kbit_training(model)

model = get_peft_model(model, peft_config)

The `load_in_4bit` and `load_in_8bit` arguments are deprecated and will be removed in the future versions. Please, pass a `BitsAndBytesConfig` object in `quantization_config` argument instead.


In [16]:
def print_trainable_parameters(model):
    """
    Prints the number of trainable parameters in the model.
    """
    trainable_params = 0
    all_param = 0
    for _, param in model.named_parameters():
        all_param += param.numel()
        if param.requires_grad:
            trainable_params += param.numel()
    print(
        f"trainable params: {trainable_params} || all params: {all_param} || trainable%: {100 * trainable_params / all_param:.2f}"
    )

print_trainable_parameters(model)

trainable params: 11272192 || all params: 1247086592 || trainable%: 0.90


In [23]:
new_model = "model"

training_args = KTOConfig(
    per_device_train_batch_size=32,
    per_device_eval_batch_size=64,
    gradient_accumulation_steps=4,
    gradient_checkpointing=True,
    remove_unused_columns=True,
    learning_rate=5.0e-06,
    eval_strategy = "steps",   # run evals by step
    eval_steps = 50,
    logging_strategy="steps",
    lr_scheduler_type="cosine",
    num_train_epochs=8,
    save_strategy="steps",
    save_steps=50,
    logging_steps=10,
    output_dir=new_model,
    optim="paged_adamw_32bit",
    warmup_steps=10,
    bf16=True,
    report_to="none",
    beta=0.1,
    max_prompt_length=2048,
    max_length=2048,
    load_best_model_at_end=True
)

In [24]:
kto_trainer = KTOTrainer(
    model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
    processing_class=tokenizer,
    peft_config=peft_config
)



Extracting prompt from train dataset:   0%|          | 0/12514 [00:00<?, ? examples/s]

Applying chat template to train dataset:   0%|          | 0/12514 [00:00<?, ? examples/s]

Extracting prompt from eval dataset:   0%|          | 0/3128 [00:00<?, ? examples/s]

Applying chat template to eval dataset:   0%|          | 0/3128 [00:00<?, ? examples/s]

Tokenizing train dataset:   0%|          | 0/12514 [00:00<?, ? examples/s]

Processing tokenized train dataset:   0%|          | 0/12514 [00:00<?, ? examples/s]

Tokenizing eval dataset:   0%|          | 0/3128 [00:00<?, ? examples/s]

Processing tokenized eval dataset:   0%|          | 0/3128 [00:00<?, ? examples/s]

Extracting KL train dataset:   0%|          | 0/12514 [00:00<?, ? examples/s]

Processing tokenized train KL dataset:   0%|          | 0/12514 [00:00<?, ? examples/s]

Extracting eval KL dataset:   0%|          | 0/3128 [00:00<?, ? examples/s]

Processing tokenized eval KL dataset:   0%|          | 0/3128 [00:00<?, ? examples/s]

No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


In [25]:
kto_trainer.train()



Step,Training Loss,Validation Loss,Rewards/chosen,Logps/chosen,Logits/chosen,Rewards/rejected,Logps/rejected,Logits/rejected,Rewards/margins,Kl
50,0.5001,0.494101,1.533842,-152.55521,15215297.70279,1.485706,-146.833451,14558489.971014,0.048136,15.093266
100,0.5013,0.478914,0.386862,-164.025004,14497887.688514,0.211955,-159.570957,13703608.055451,0.174907,2.97241
150,0.4998,0.476277,1.477579,-153.117841,15005117.881895,1.281977,-148.870737,14316645.303088,0.195601,13.734534
200,0.5013,0.458288,0.547776,-162.415862,14591456.43608,0.196737,-159.723151,13830924.743541,0.351039,3.731498
250,0.4972,0.459746,1.46113,-153.282345,14751944.680078,1.124329,-150.447227,14163124.022684,0.3368,12.882338
300,0.4994,0.439193,0.701554,-160.878082,14429932.895522,0.177272,-159.917789,13917699.548834,0.524282,4.345872
350,0.5005,0.441738,1.442644,-153.467168,13991078.790396,0.942732,-152.263193,13386826.202899,0.499911,11.848941
400,0.5003,0.421696,0.850085,-159.392785,13424065.868916,0.154815,-160.142358,12966533.242596,0.695269,5.01774
450,0.497,0.425333,1.40167,-153.876916,13448217.915639,0.744254,-154.247972,13024183.087587,0.657416,10.682222
500,0.503,0.408642,1.008457,-157.809053,12678201.811811,0.173632,-159.954198,12371205.323251,0.834825,5.871139



Cannot access gated repo for url https://huggingface.co/meta-llama/Llama-3.2-1B/resolve/main/config.json.
Access to model meta-llama/Llama-3.2-1B is restricted. You must have access to it and be authenticated to access it. Please log in. - silently ignoring the lookup for the file config.json in meta-llama/Llama-3.2-1B.

Cannot access gated repo for url https://huggingface.co/meta-llama/Llama-3.2-1B/resolve/main/config.json.
Access to model meta-llama/Llama-3.2-1B is restricted. You must have access to it and be authenticated to access it. Please log in. - silently ignoring the lookup for the file config.json in meta-llama/Llama-3.2-1B.

Cannot access gated repo for url https://huggingface.co/meta-llama/Llama-3.2-1B/resolve/main/config.json.
Access to model meta-llama/Llama-3.2-1B is restricted. You must have access to it and be authenticated to access it. Please log in. - silently ignoring the lookup for the file config.json in meta-llama/Llama-3.2-1B.

Cannot access gated repo for u

TrainOutput(global_step=784, training_loss=0.4985573246162765, metrics={'train_runtime': 2945.7938, 'train_samples_per_second': 33.985, 'train_steps_per_second': 0.266, 'total_flos': 0.0, 'train_loss': 0.4985573246162765, 'epoch': 8.0})