In [1]:
import torch
import sys
sys.path.append('..')
from model.utils import LMHyperParams, SmModel, ModelChoice
from dataset.squad import UltraFeedbackDataModule
from transformers import AutoTokenizer, PreTrainedTokenizer
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
from peft.tuners.lora.config import LoraConfig
from transformers import TrainingArguments
from trl import DPOTrainer, DPOConfig

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
%load_ext autoreload
%autoreload 2

In [3]:
model_id = "cognitivecomputations/dolphin-2.1-mistral-7b" # replace with your model id

tokenizer: PreTrainedTokenizer = AutoTokenizer.from_pretrained(model_id) # type: ignore
data_module = UltraFeedbackDataModule(2, tokenizer, 1024, 10)
data_module.setup("fit")

[32m2024-11-23 17:39:38.958[0m | [1mINFO    [0m | [36mdataset.squad[0m:[36msetup[0m:[36m219[0m - [1mLoading dataset for stage fit[0m
[32m2024-11-23 17:39:40.881[0m | [1mINFO    [0m | [36mdataset.squad[0m:[36msetup[0m:[36m230[0m - [1mProcessing dataset for stage fit, workers: 12, cache dir dataset_caches/ultrafeedback[0m
num_proc must be <= 9. Reducing num_proc to 9 for dataset of size 9.
num_proc must be <= 1. Reducing num_proc to 1 for dataset of size 1.


In [None]:
model_id = "cognitivecomputations/dolphin-2.1-mistral-7b" # replace with your model id

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True, bnb_4bit_use_double_quant=True, bnb_4bit_quant_type="nf4", bnb_4bit_compute_dtype=torch.bfloat16
)

model = AutoModelForCausalLM.from_pretrained(
    model_id,
    device_map="auto",
    use_cache=False, 
    attn_implementation="flash_attention_2",
    torch_dtype=torch.bfloat16,
    quantization_config=bnb_config
)
tokenizer = AutoTokenizer.from_pretrained(model_id)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = 'left' # to prevent errors with FA
tokenizer.truncation_side = 'left' # to prevent cutting off last generation

Loading checkpoint shards: 100%|██████████| 2/2 [00:20<00:00, 10.24s/it]


In [5]:
prompt_length = 1024
max_seq_length = 1512

peft_config = LoraConfig(
    lora_alpha=128,
    lora_dropout=0.05,
    r=256,
    bias="none",
    target_modules="all-linear",
    task_type="CAUSAL_LM",
)

args = DPOConfig(
    output_dir="doplhin-dpo",
    num_train_epochs=1,
    per_device_train_batch_size=12,
    per_device_eval_batch_size=4,
    gradient_accumulation_steps=1,
    gradient_checkpointing=True,
    optim="adamw_torch_fused",
    learning_rate=5e-5,
    max_grad_norm=0.3,
    warmup_ratio=0.1,
    lr_scheduler_type="cosine",
    logging_steps=25,
    save_steps=500,
    save_total_limit=2,
    evaluation_strategy="steps",
    eval_steps=700,
    bf16=True,
    tf32=True,
    push_to_hub=False,
    report_to="tensorboard",
)

dpo_args = {
    "beta": 0.1,  # The beta factor in DPO loss. Higher beta means less divergence
    "loss_type": "sigmoid",  # The loss type for DPO.
}


trainer = DPOTrainer(
    model,
    ref_model=None,  # set to none since we use peft
    peft_config=peft_config,
    args=args,
    train_dataset=data_module.train_dataset,
    eval_dataset=data_module.val_dataset,
    tokenizer=tokenizer,  # type: ignore
    max_length=max_seq_length,
    max_prompt_length=prompt_length,
    beta=dpo_args["beta"],
    loss_type=dpo_args["loss_type"],
)


Deprecated positional argument(s) used in DPOTrainer, please use the DPOConfig to set these arguments instead.
Extracting prompt from train dataset: 100%|██████████| 9/9 [00:00<00:00, 369.29 examples/s]
Applying chat template to train dataset: 100%|██████████| 9/9 [00:00<00:00, 390.69 examples/s]
Extracting prompt from eval dataset: 100%|██████████| 1/1 [00:00<00:00, 77.29 examples/s]
Applying chat template to eval dataset: 100%|██████████| 1/1 [00:00<00:00, 89.00 examples/s]
Tokenizing train dataset: 100%|██████████| 9/9 [00:00<00:00, 260.41 examples/s]
Tokenizing eval dataset: 100%|██████████| 1/1 [00:00<00:00, 79.87 examples/s]


In [11]:
dataloader = data_module.train_dataloader()
first_batch = next(iter(dataloader))
trainer.compute_loss(model, first_batch, "train")

KeyError: 'prompt_input_ids'

In [6]:
trainer.train()

  prompt_input_ids = [torch.tensor(example["prompt_input_ids"]) for example in examples]
  chosen_input_ids = [torch.tensor(example["chosen_input_ids"]) for example in examples]
  rejected_input_ids = [torch.tensor(example["rejected_input_ids"]) for example in examples]
The input hidden states seems to be silently casted in float32, this might be related to the fact you have upcasted embedding or layer norm layers in float32. We will cast back the input in torch.bfloat16.
Could not estimate the number of tokens of the input, floating-point operations will not be computed


Step,Training Loss,Validation Loss


TrainOutput(global_step=1, training_loss=0.6931471824645996, metrics={'train_runtime': 20.0123, 'train_samples_per_second': 0.45, 'train_steps_per_second': 0.05, 'total_flos': 0.0, 'train_loss': 0.6931471824645996, 'epoch': 1.0})