<a href="https://colab.research.google.com/github/david-meltzer/LLMs/blob/main/training/colab/DPO/fine_tuning.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Dependencies

In [1]:
from google.colab import drive
drive.mount('/content/drive')

%cd drive/MyDrive/LLMs/Fine-tuning/DPO

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
/content/drive/MyDrive/LLMs/Fine-tuning/DPO


In [2]:
!pip install peft==0.4.0 -qqq
!pip install bitsandbytes==0.41.1 -qqq
!pip install safetensors>=0.3.1 -qqq
#!pip install -U trl
!pip install wandb -qqq
!pip install tokenizers>=0.13.3 -qqq
!pip install -U transformers -qqq
!pip install accelerate==0.21.0 -qqq
!pip install git+https://github.com/huggingface/trl -qqq

  Preparing metadata (setup.py) ... [?25l[?25hdone


In [3]:
import os
#from dataclasses import dataclass, field
#from typing import Optional

import warnings
from collections import defaultdict
from typing import Any, Callable, Dict, List, Literal, Optional, Tuple, Union

import torch
import torch.nn as nn
import torch.nn.functional as F
import datasets
from datasets import Dataset, load_dataset
import transformers
from transformers import (AutoTokenizer,
                          AutoModelForCausalLM,
                          DataCollator,
                          PreTrainedModel,
                          PreTrainedTokenizerBase,
                          Trainer,
                          TrainingArguments,
                          DataCollatorForLanguageModeling,
                          BitsAndBytesConfig)

from transformers.trainer_callback import TrainerCallback

import gc

import os
from google.colab import runtime
import pandas as pd

import accelerate
import bitsandbytes as bnb
import wandb
from peft import (LoraConfig,
                  get_peft_model,
                  prepare_model_for_kbit_training,
                  PeftModel,
                  PeftConfig)
from trl import SFTTrainer, DataCollatorForCompletionOnlyLM
from datetime import datetime
from huggingface_hub import login

from peft.tuners.lora import LoraLayer

from tqdm import tqdm

import trl
from trl import DPOTrainer
from trl.models import create_reference_model
from trl.import_utils import is_peft_available
from trl.trainer.dpo_trainer import DPODataCollatorWithPadding, disable_dropout_in_model, pad_to_length

from huggingface_hub import login

  warn("The installed version of bitsandbytes was compiled without GPU support. "


/usr/local/lib/python3.10/dist-packages/bitsandbytes/libbitsandbytes_cpu.so: undefined symbol: cadam32bit_grad_fp32


In [4]:
login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

# Definitions

In [5]:
def chars_token_ratio(dataset, tokenizer, nb_examples=400):
    """
    Estimate the average number of characters per token in the dataset.
    """
    total_characters, total_tokens = 0, 0
    for _, example in tqdm(zip(range(nb_examples), iter(dataset)), total=nb_examples):
        text = prepare_sample_text(example)
        total_characters += len(text)
        if tokenizer.is_fast:
            total_tokens += len(tokenizer(text).tokens())
        else:
            total_tokens += len(tokenizer.tokenize(text))

    return total_characters / total_tokens

def prepare_sample_text(example):
    """Prepare the text from a sample of the dataset."""
    text = f"### Human: {example['question']}\n ### Assistant: {example['response_j']}"
    return text

def formatting_prompts_func(example):
    output_texts = []
    for i in range(len(example['question'])):
        text = f"### Human: {example['question'][i]}\n ### Assistant: {example['answer'][i]}"
        output_texts.append(text)
    return output_texts


def find_all_linear_names(model):
    lora_module_names = set()
    for name, module in model.named_modules():
        if isinstance(module, bnb.nn.Linear4bit):
            names = name.split(".")
            lora_module_names.add(names[0] if len(names) == 1 else names[-1])

    if "lm_head" in lora_module_names:  # needed for 16-bit
        lora_module_names.remove("lm_head")
    return list(lora_module_names)


def create_peft_model(model,
                      r=64,
                      lora_alpha=16,
                      lora_dropout=0.1,
                      bias='none',
                      task_type='CAUSAL_LM',
                      gradient_checkpointing=True,
                      bf16=True):

    # prepare int-4 model for training
    model = prepare_model_for_kbit_training(
        model, use_gradient_checkpointing=gradient_checkpointing
    )
    if gradient_checkpointing:
        model.gradient_checkpointing_enable()

    # get lora target modules
    modules = find_all_linear_names(model)
    print(f"Found {len(modules)} modules to quantize: {modules}")

    peft_config = LoraConfig(
        r=r,
        lora_alpha=lora_alpha,
        target_modules=modules,
        lora_dropout=lora_dropout,
        bias=bias,
        task_type=task_type,
    )

    model = get_peft_model(model, peft_config)

    # pre-process the model by upcasting the layer norms in float 32 for
    for name, module in model.named_modules():
        if isinstance(module, LoraLayer):
            if bf16:
                module = module.to(torch.bfloat16)
        if "norm" in name:
            module = module.to(torch.float32)
        if "lm_head" in name or "embed_tokens" in name:
            if hasattr(module, "weight"):
                if bf16 and module.weight.dtype == torch.float32:
                    module = module.to(torch.bfloat16)

    model.print_trainable_parameters()
    return model

class PeftSavingCallback(TrainerCallback):
    def on_save(self, args, state, control, **kwargs):
        checkpoint_path = os.path.join(args.output_dir, f"checkpoint-{state.global_step}")
        kwargs["model"].save_pretrained(checkpoint_path)

        if "pytorch_model.bin" in os.listdir(checkpoint_path):
            os.remove(os.path.join(checkpoint_path, "pytorch_model.bin"))

# Dataset

In [6]:
def chosen_rejected(example):
    scores = example['answers.score']
    answers = example['answers.text']

    if scores[0]>scores[1]:
        return {'chosen':answers[0],'rejected':answers[1]}
    else:
        return {'chosen':answers[1],'rejected':answers[0]}

def format_prompt(example):
    """Prepare the text from a sample of the dataset."""
    text = f"### Human: {example['title_body']}\n ### Assistant:"
    return text

In [7]:
ds_RM = datasets.load_from_disk('../../ELI5_dataset/data/RM_non_toxic')
features = list(ds_RM['train'].features)
ds_RM = ds_RM.remove_columns([col for col in features if
                             col not in ['answers.score',
                                         'answers.text',
                                         'title_body']])

ds_RM_top_2 = ds_RM.map(lambda x:
 {'answers.score': x['answers.score'][:2]})

ds_RM_top_2 = ds_RM_top_2.map(lambda x:
 {'answers.text': x['answers.text'][:2]})

ds_RM_top_2 = ds_RM_top_2.rename_columns({'title_body':'question'})

ds_RM_top_2 = ds_RM_top_2.map(lambda x:chosen_rejected(x))
ds_RM_top_2 = ds_RM_top_2.remove_columns(['answers.score',
                                          'answers.text'])

# Training


In [None]:
model_name = 'meta-llama/Llama-2-7b-hf'
peft_model_id = 'dhmeltzer/Llama-2-7b-hf-wiki-no-group-by-length_r_64_alpha_16'

from peft import AutoPeftModelForCausalLM
peft_model = AutoPeftModelForCausalLM.from_pretrained(peft_model_id)
#config = PeftConfig.from_pretrained(peft_model_id)
#
#model = AutoModelForCausalLM.from_pretrained(config.base_model_name_or_path)
#model = PeftModel.from_pretrained(model, peft_model_id)
#tokenizer = AutoTokenizer.from_pretrained(config.base_model_name_or_path)

In [10]:
model = peft_model.merge_and_unload()

In [None]:
model = AutoPeftModelForCausalLM.from_pretrained(
        peft_model_id,
        low_cpu_mem_usage=True,
        torch_dtype=torch.float16,
        load_in_4bit=True,
    )

model = model.merge_and_unload()

model.config.use_cache = False

model_ref = AutoPeftModelForCausalLM.from_pretrained(
        peft_model_id,
        low_cpu_mem_usage=True,
        torch_dtype=torch.float16,
        load_in_4bit=True,
    )

model_ref = model_ref.merge_and_unload()

tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-2-7b-hf")
tokenizer.pad_token = tokenizer.eos_token

In [None]:
model

In [None]:

output_dir = './model/wiki_SFT_no_gl_DPO'

training_args = TrainingArguments(
        per_device_train_batch_size=8,
        per_device_eval_batch_size=8,
        max_steps=-1,
        num_epochs = 3
        logging_steps=100,
        save_steps=.1,
        gradient_accumulation_steps=16,
        gradient_checkpointing=True,
        learning_rate=2*10**-5,
        evaluation_strategy="steps",
        eval_steps=.1,
        output_dir=output_dir,
        logging_dir=output_dir+'/logs'
        report_to='wandb',
        #lr_scheduler_type=script_args.lr_scheduler_type,
        warmup_steps=0.05,
        optim='paged_adamw_32bit',
        bf16=True,
        remove_unused_columns=False,
        run_name="dpo_llama2_wiki_SFT_no_gl",
        load_best_model_at_end=True,
        save_total_limit = 3,
        remove_unused_columns=False,
        disable_tqdm=False,)