In [19]:
!pip install flash-attention

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Collecting flash-attention
  Downloading flash_attention-1.0.0-py3-none-any.whl (31 kB)
Installing collected packages: flash-attention
Successfully installed flash-attention-1.0.0


# Imports

In [1]:
import pandas as pd
import numpy as np
from datasets import load_dataset, Dataset, DatasetDict, load_from_disk
from transformers import (
    AutoTokenizer, AutoModelForCausalLM,
    TrainingArguments, Trainer,
    DataCollatorForLanguageModeling,
    DataCollatorWithPadding
)
from peft import PeftModel, get_peft_model, LoraConfig, TaskType, prepare_model_for_kbit_training
from transformers import BitsAndBytesConfig
import torch
import wandb

from utils import tokenize_dataset_for_qna

  from .autonotebook import tqdm as notebook_tqdm


# Configs

In [20]:
data_path = "../data/qna/"
train_data_path = data_path + "train.csv"
val_data_path = data_path + "val.csv"

max_len = 512

base_model_path = "../models/phi_pubmed_pretrained_attempt_3/final_pretrained"

model_id = "microsoft/Phi-3.5-mini-instruct"

model_output_dir = "../models/phi_qna_finetuned_attempt_3"

# Hyperparameters

In [16]:
lora_r = 24
lora_alpha = 48
lora_target_modules = ["q_proj", "v_proj", "o_proj"]
batch_size = 32
quantization = None
lora_dropout = 0.05
epochs = 6
learning_rate = 5e-5

# Dataset

In [4]:
prompt_template = """
# Instruction:
Assume you are an excellent doctor. Using your knowledge, answer the question given below.

# Question: {question}

# Answer: """
prompt_template = prompt_template.strip()
print(prompt_template)

# Instruction:
Assume you are an excellent doctor. Using your knowledge, answer the question given below.

# Question: {question}

# Answer:


In [5]:
tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)

In [13]:
val_df = pd.read_csv(val_data_path)
train_df = pd.read_csv(train_data_path)

val_set = tokenize_dataset_for_qna(tokenizer, val_df, prompt_template, max_len)
train_set = tokenize_dataset_for_qna(tokenizer, train_df, prompt_template, max_len)

Map:   0%|                                                                                                                                                                                                    | 0/300 [00:00<?, ? examples/s]


NameError: name 'paddint_len' is not defined

In [None]:
# train_set.save_to_disk(data_path + "tokenized")
train_set = load_from_disk(data_path + "tokenized")

In [14]:
train_set[0]

{'question': 'I have had a cyst now for 6 months.6weeks ago it became bigger and I smashed it alittle and alot of thick clear stcky stuff came out of it.Now theres a huge hole...like a sack.it will not heal or fill in and really needs stitches but if you have it fixed...the cyst will come back.Its not painful in the least.(its in between my vag. and anus )Its just a hole.my dad told me when he was younger this same thing happen to him 4 times until he had the sack removed.What is this???I was very scared of cancer but my dad said it will go away when the sack is removed.Im 48 yrs old.Its on my scar where they cut me so I could have my children.thank you Tina Leatherwood',
 'answer': 'Welcome to Chat Doctor It needs to be examined to know whether it is just and abscess or a fistula.  In case of fistula it needs to be Chat Doctor.  After that pain medication to reduce the pain and antibiotic to prevent and check the infection. In case assess if it is small there is no need of stitches, t

In [17]:
wandb.init(
    project="med-qna-finetune",
    name="attempt_3",
    config={
        "model": model_id,
        "lora_r": lora_r,
        "lora_alpha": lora_alpha,
        "batch_size": batch_size,
        "epochs": epochs,
        "quantization": quantization,
        "lora_target_modules": lora_target_modules
    }
)

[34m[1mwandb[0m: [32m[41mERROR[0m Failed to detect the name of this notebook. You can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mhasindumadushan325[0m ([33mhasindumadushan325-university-of-peradeniya[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


In [None]:
bnb_config = BitsAndBytesConfig(
    load_in_4bit=quantization=="4bit",
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16
)

In [21]:
model = AutoModelForCausalLM.from_pretrained(
    base_model_path,
    # quantization_config=bnb_config,
    device_map="auto",
    trust_remote_code=True
)

Loading checkpoint shards: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4/4 [00:01<00:00,  2.00it/s]


In [22]:
model.gradient_checkpointing_enable()
# model = prepare_model_for_kbit_training(model)

In [23]:
lora_config = LoraConfig(
    r=lora_r,
    lora_alpha=lora_alpha,
    target_modules=lora_target_modules,
    lora_dropout=lora_dropout,
    bias="none",
    task_type=TaskType.CAUSAL_LM
)

model = get_peft_model(model, lora_config)

# Train

In [24]:
training_args = TrainingArguments(
    output_dir=model_output_dir,
    per_device_train_batch_size=batch_size,
    per_device_eval_batch_size=batch_size,
    num_train_epochs=epochs,
    eval_strategy="epoch",  # ✅ eval at each epoch
    save_strategy="epoch",
    logging_steps=50,
    learning_rate=learning_rate,
    fp16=True,
    report_to="wandb",
    run_name="attempt_3",
    load_best_model_at_end=True,
    metric_for_best_model="eval_loss"
)

In [26]:
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_set,
    eval_dataset=val_set,
    tokenizer=tokenizer,
    data_collator=DataCollatorWithPadding(tokenizer, padding=False)
)

  trainer = Trainer(
No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


In [None]:
trainer.train()

trainer.save_model(model_output_dir + "/final")
tokenizer.save_pretrained(model_output_dir + "/final")

`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
You are not running the flash-attention implementation, expect numerical differences.


Epoch,Training Loss,Validation Loss


## Merge model with lora weights

In [None]:
base_model = AutoModelForCausalLM.from_pretrained(
    model_id,
    # quantization_config=bnb_config,
    device_map="auto",
    trust_remote_code=True
)

finetuned_model = PeftModel.from_pretrained(base_model, output_model_dir + "/final")
merged_model = finetuned_model.merge_and_unload()

In [None]:
model.save_pretrained(output_model_dir +  "/final_pretrained")