# Finetune Out of scope negative samples

To teach the model not to answer out of scope (domain) questions

## Imports

In [None]:
import pandas as pd
import numpy as np
from datasets import load_dataset, Dataset, DatasetDict, load_from_disk
from transformers import (
    AutoTokenizer, AutoModelForCausalLM,
    TrainingArguments, Trainer,
    DataCollatorForLanguageModeling,
    DataCollatorWithPadding
)
from peft import PeftModel, get_peft_model, LoraConfig, TaskType, prepare_model_for_kbit_training
from transformers import BitsAndBytesConfig
import torch
import wandb

from tokenize import tokenize_dataset_for_domain_bound_qna
from prompt_templates import qna_prompt_template as prompt_template

## Configs

In [None]:
attempt = "attempt_10"

data_path = "../data/domain_bound_data/v7/"
train_data_path = data_path + "train.csv"
val_data_path = data_path + "val.csv"

max_len = 512

base_model_path = "../models/phi_qna_finetuned_attempt_5/final_merged"

model_id = "microsoft/Phi-3.5-mini-instruct"

model_output_dir = f"../models/phi_domain_bound_qna_finetuned_{attempt}"

# Hyperparameters

In [None]:
lora_r = 32
lora_alpha = 64
lora_target_modules = ["qkv_proj"]
batch_size = 32
quantization = None
lora_dropout = 0.05
epochs = 3
learning_rate = 5e-6
warmup_steps = 200
learning_rate_scheduler = "linear"

# Dataset

In [None]:
tokenizer = AutoTokenizer.from_pretrained(base_model_path, trust_remote_code=True)

In [None]:
val_df = pd.read_csv(val_data_path)
train_df = pd.read_csv(train_data_path)

val_set = tokenize_dataset_for_domain_bound_qna(tokenizer, val_df[["question", "answer", "class"]], prompt_template, max_len)
train_set = tokenize_dataset_for_domain_bound_qna(tokenizer, train_df[["question", "answer", "class"]], prompt_template, max_len)

In [None]:
train_set[1]

In [None]:
train_set.save_to_disk(data_path + "tokenized")
# train_set = load_from_disk(data_path + "tokenized")

In [None]:
wandb.init(
    project="med-domain_bound_qna_finetune",
    name=attempt,
    config={
        "model": model_id,
        "lora_r": lora_r,
        "lora_alpha": lora_alpha,
        "batch_size": batch_size,
        "epochs": epochs,
        "quantization": quantization,
        "lora_target_modules": lora_target_modules,
        "learning_rate_scheduler": learning_rate_scheduler,
        "warmup_steps": warmup_steps
    }
)

In [None]:
bnb_config = BitsAndBytesConfig(
    load_in_4bit=quantization=="4bit",
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16
)

In [None]:
model = AutoModelForCausalLM.from_pretrained(
    base_model_path,    # quantization_config=bnb_config,
    device_map="auto",
    trust_remote_code=False
)

In [None]:
model.gradient_checkpointing_enable()
# model = prepare_model_for_kbit_training(model)

In [None]:
lora_config = LoraConfig(
    r=lora_r,
    lora_alpha=lora_alpha,
    target_modules=lora_target_modules,
    lora_dropout=lora_dropout,
    bias="none",
    task_type=TaskType.CAUSAL_LM
)

model = get_peft_model(model, lora_config)

# Train

In [None]:
training_args = TrainingArguments(
    output_dir=model_output_dir,
    per_device_train_batch_size=batch_size,
    per_device_eval_batch_size=batch_size,
    num_train_epochs=epochs,
    eval_strategy="epoch",  # ✅ eval at each epoch
    save_strategy="epoch",
    logging_steps=25,
    learning_rate=learning_rate,
    lr_scheduler_type=learning_rate_scheduler,
    warmup_steps=warmup_steps,
    fp16=True,
    report_to="wandb",
    run_name=attempt,
    load_best_model_at_end=True,
    metric_for_best_model="eval_loss",
    label_names=["labels"]
)

In [None]:
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_set,
    eval_dataset=val_set,
    tokenizer=tokenizer,
    data_collator=DataCollatorWithPadding(tokenizer, padding=False)
)

In [None]:
trainer.train()

trainer.save_model(model_output_dir + "/final")
tokenizer.save_pretrained(model_output_dir + "/final")

## Merge model with lora weights

In [None]:
base_model = AutoModelForCausalLM.from_pretrained(
    base_model_path,    # quantization_config=bnb_config,
    device_map="auto",
    trust_remote_code=False
)

finetuned_model = PeftModel.from_pretrained(base_model, model_output_dir + "/final")
tokenizer = AutoTokenizer.from_pretrained(model_output_dir + "/final", trust_remote_code=True)
merged_model = finetuned_model.merge_and_unload()

In [None]:
merged_model.save_pretrained(model_output_dir +  "/final_merged", safe_serialization=True)
tokenizer.save_pretrained(model_output_dir + "/final_merged")