In [1]:
!pip install -q transformers datasets accelerate peft trl bitsandbytes

[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/480.6 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[90m╺[0m[90m━━━━━━[0m [32m399.4/480.6 kB[0m [31m12.0 MB/s[0m eta [36m0:00:01[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m480.6/480.6 kB[0m [31m10.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m365.7/365.7 kB[0m [31m28.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m69.1/69.1 MB[0m [31m28.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m116.3/116.3 kB[0m [31m10.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m179.3/179.3 kB[0m [31m17.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m134.8/134.8 kB[0m [31m13.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━

In [2]:
import gc
import json
import os
import pandas as pd
import re
import torch

from datasets import Dataset, DatasetDict
from peft import LoraConfig, PeftModel, prepare_model_for_kbit_training
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    TrainingArguments,
    pipeline,
)
from torch.utils.data import DataLoader, Dataset
from trl import ORPOConfig, ORPOTrainer, setup_chat_format

# Load preprocessed dataset

In [3]:
GOOGLE_DRIVE_DATASET_URL = (
    "https://drive.google.com/file/d/1jlG-__9zwHB_USs5q2D7ipb4SV0TJFBe/view?usp=sharing"
)
FILE_ID = re.search("/file/d/(.*)/view", GOOGLE_DRIVE_DATASET_URL).group(1)
DOWNLOAD_URL = f"https://drive.usercontent.google.com/download?id={FILE_ID}&export=download&authuser=0&confirm=t"
preferences_df = pd.read_csv(DOWNLOAD_URL, index_col=0)

In [4]:
TEST_SIZE=0.2
eval_df = preferences_df.sample(frac=TEST_SIZE, random_state=0)
train_df = preferences_df[~preferences_df.index.isin(eval_df.index)].sample(frac = 1.0)

# Load the model

Load the LLama-3 8B model.

In [5]:
from huggingface_hub import login

login(token=os.environ["HF_TOKEN"])

### Setup the quantization


This configuration enables 4-bit quantization with NF4 for higher precision, uses FP16 for faster fine-tuning, and applies double quantization to optimize memory usage.

In [6]:
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_use_double_quant=True,
)

In [7]:
model_id = "meta-llama/Meta-Llama-3-8B"
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(
    model_id,
    quantization_config=bnb_config,
    device_map="auto",
)
model, tokenizer = setup_chat_format(model, tokenizer)
model = prepare_model_for_kbit_training(model)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/50.6k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/9.09M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/73.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/654 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/23.9k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/4 [00:00<?, ?it/s]

model-00001-of-00004.safetensors:   0%|          | 0.00/4.98G [00:00<?, ?B/s]

model-00002-of-00004.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

model-00003-of-00004.safetensors:   0%|          | 0.00/4.92G [00:00<?, ?B/s]

model-00004-of-00004.safetensors:   0%|          | 0.00/1.17G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/177 [00:00<?, ?B/s]

The new embeddings will be initialized from a multivariate normal distribution that has old embeddings' mean and covariance. As described in this article: https://nlp.stanford.edu/~johnhew/vocab-expansion.html. To disable this, use `mean_resizing=False`
The new lm_head weights will be initialized from a multivariate normal distribution that has old embeddings' mean and covariance. As described in this article: https://nlp.stanford.edu/~johnhew/vocab-expansion.html. To disable this, use `mean_resizing=False`


# Prepare the dataset

Prepare the dataset to be used in huggingface.  
Samples will be formatted as a preference dataset and tokenized.

In [8]:
def chat_template_input(text):
    return [{"role": "assistant", "content": text if text else ""}]

def format_chat_template(row):
    row["chosen"] = tokenizer.apply_chat_template(chat_template_input(row["answer"]), tokenize=False)
    row["rejected"] = tokenizer.apply_chat_template(chat_template_input(row["incorrect_answer"]), tokenize=False)
    return row

train_dataset = Dataset.from_pandas(train_df, split="train").map(format_chat_template)
eval_dataset = Dataset.from_pandas(eval_df, split="evaluation").map(format_chat_template)

Map:   0%|          | 0/11985 [00:00<?, ? examples/s]

Map:   0%|          | 0/2996 [00:00<?, ? examples/s]

### Setup LORA with ORPO optimization algorithm

This LoRA configuration is designed for fine-tuning a language model by targeting both lower-level attention layers (e.g., k_proj, q_proj, v_proj) and higher-level feedforward layers (e.g., up_proj, down_proj, gate_proj, o_proj). By doing so, it jointly optimizes tasks typically handled by separate processes—supervised fine-tuning (SFT) and alignment techniques like DPO or RLHF—while eliminating the need for a separate reward model.

In [9]:
peft_config = LoraConfig(
    r=16,
    lora_alpha=32,
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM",
    target_modules=['up_proj', 'down_proj', 'gate_proj', 'k_proj', 'q_proj', 'v_proj', 'o_proj']
)

In [13]:
orpo_args = ORPOConfig(
    learning_rate=8e-6,
    beta=0.1,
    lr_scheduler_type="linear",
    max_length=1024,
    max_prompt_length=512,
    per_device_train_batch_size=2,
    per_device_eval_batch_size=2,
    gradient_accumulation_steps=4,
    optim="paged_adamw_8bit",
    num_train_epochs=1,
    evaluation_strategy="steps",
    eval_steps=0.2,
    logging_steps=1,
    warmup_steps=10,
    report_to="none",
    output_dir="./results/",
)



# Train the model

In [14]:
trainer = ORPOTrainer(
    model=model,
    args=orpo_args,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
    peft_config=peft_config,
    tokenizer=tokenizer,
)
trainer.train()
trainer.save_model('medllm_supportiv')



Map:   0%|          | 0/11985 [00:00<?, ? examples/s]

Map:   0%|          | 0/11985 [00:00<?, ? examples/s]

Map:   0%|          | 0/11985 [00:00<?, ? examples/s]

Map:   0%|          | 0/2996 [00:00<?, ? examples/s]

Map:   0%|          | 0/2996 [00:00<?, ? examples/s]

Map:   0%|          | 0/2996 [00:00<?, ? examples/s]

  return fn(*args, **kwargs)


Step,Training Loss,Validation Loss,Runtime,Samples Per Second,Steps Per Second,Rewards/chosen,Rewards/rejected,Rewards/accuracies,Rewards/margins,Logps/rejected,Logps/chosen,Logits/rejected,Logits/chosen,Nll Loss,Log Odds Ratio,Log Odds Chosen
300,0.9926,0.994316,2220.0181,1.35,0.675,-0.098528,-0.300392,0.999666,0.201864,-3.003922,-0.985278,-0.046688,-0.721247,0.983747,-0.105686,2.551588
600,0.8516,0.964482,2220.6617,1.349,0.675,-0.095936,-0.365423,0.999666,0.269487,-3.65423,-0.959364,0.025925,-0.5361,0.958628,-0.058535,3.27465
900,0.7847,0.950398,2221.1117,1.349,0.674,-0.09465,-0.394382,0.999666,0.299732,-3.943823,-0.9465,0.077681,-0.451373,0.945821,-0.045767,3.595796
1200,0.932,0.9441,2219.8419,1.35,0.675,-0.094086,-0.412279,0.999666,0.318193,-4.122788,-0.940862,0.136287,-0.431917,0.940148,-0.039521,3.789207


  return fn(*args, **kwargs)
  return fn(*args, **kwargs)


Step,Training Loss,Validation Loss,Runtime,Samples Per Second,Steps Per Second,Rewards/chosen,Rewards/rejected,Rewards/accuracies,Rewards/margins,Logps/rejected,Logps/chosen,Logits/rejected,Logits/chosen,Nll Loss,Log Odds Ratio,Log Odds Chosen
300,0.9926,0.994316,2220.0181,1.35,0.675,-0.098528,-0.300392,0.999666,0.201864,-3.003922,-0.985278,-0.046688,-0.721247,0.983747,-0.105686,2.551588
600,0.8516,0.964482,2220.6617,1.349,0.675,-0.095936,-0.365423,0.999666,0.269487,-3.65423,-0.959364,0.025925,-0.5361,0.958628,-0.058535,3.27465
900,0.7847,0.950398,2221.1117,1.349,0.674,-0.09465,-0.394382,0.999666,0.299732,-3.943823,-0.9465,0.077681,-0.451373,0.945821,-0.045767,3.595796
1200,0.932,0.9441,2219.8419,1.35,0.675,-0.094086,-0.412279,0.999666,0.318193,-4.122788,-0.940862,0.136287,-0.431917,0.940148,-0.039521,3.789207




In [None]:
del model
torch.cuda.empty_cache()
gc.collect()

# Load the finetuned model for inference

Load the base model, and merge the LORA adapter so it can be used as a normal transformers model

In [134]:
# Reload tokenizer and model
tokenizer = AutoTokenizer.from_pretrained(model_id)
base_model = AutoModelForCausalLM.from_pretrained(
    model_id,
    low_cpu_mem_usage=True,
    return_dict=True,
    torch_dtype=torch.float16,
    device_map="auto",
    #load_in_8bit=True
)
base_model, tokenizer = setup_chat_format(base_model, tokenizer)

# Merge adapter with base model
peft_model = PeftModel.from_pretrained(base_model, 'medllm_supportiv')
peft_model = peft_model.merge_and_unload().to('cuda')

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

In [136]:
pipe = pipeline("text-generation", model=peft_model, tokenizer=tokenizer, device_map="auto")

batch_size = 20
data_loader = DataLoader(eval_dataset, batch_size=batch_size, shuffle=False)

In [142]:
#inference = pipe(eval_dataset['question'], do_sample=False, max_new_tokens=150, temperature=None, top_p=None)

In [None]:
all_results = []
for batch in data_loader:
  result = pipe(batch['question'], do_sample=False, max_new_tokens=150, temperature=None, top_p=None)
  for q, a in zip(batch['question'], [r[0]['generated_text'] for r in result]):
    all_results.append({"question": q, "model_answer": a})
  print('.', end='')
  with open('partial_eval_results.json', 'w') as f:
    json.dump(all_results, f)

In [106]:
pd.DataFrame(all_results)

Unnamed: 0,question,model_answer
0,What is (are) ataxia with oculomotor apraxia ?,What is (are) ataxia with oculomotor apraxia ?...
1,What are the symptoms of Anencephaly ?,What are the symptoms of Anencephaly ? The sym...
2,What are the genetic changes related to D-bifu...,What are the genetic changes related to D-bifu...
3,What are the symptoms of Hypersensitivity Pneu...,What are the symptoms of Hypersensitivity Pneu...
4,What causes Brittle diabetes ?,What causes Brittle diabetes ? Brittle diabete...
5,What is the outlook for Gestational Trophoblas...,What is the outlook for Gestational Trophoblas...
6,What are the symptoms of Gout ?,What are the symptoms of Gout ? Gout is a form...
7,What is (are) succinic semialdehyde dehydrogen...,What is (are) succinic semialdehyde dehydrogen...
8,What are the symptoms of Thalamic degeneration...,What are the symptoms of Thalamic degeneration...
9,What are the treatments for Primary Myelofibro...,What are the treatments for Primary Myelofibro...


In [16]:
save_path = "./medllm_supportiv_merged/"
peft_model.save_pretrained(save_path)
tokenizer.save_pretrained(save_path)

('./medllm_supportiv_merged/tokenizer_config.json',
 './medllm_supportiv_merged/special_tokens_map.json',
 './medllm_supportiv_merged/tokenizer.json')