In [1]:
import os
import gc
import torch

import transformers
from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments, BitsAndBytesConfig
from datasets import load_dataset
from peft import LoraConfig, PeftModel, get_peft_model, prepare_model_for_kbit_training
from trl import DPOTrainer
import bitsandbytes as bnb
import wandb
from huggingface_hub import hf_hub_download
from huggingface_hub import login
login("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")


os.environ["CUDA_LAUNCH_BLOCKING"] = "1"

Token will not been saved to git credential helper. Pass `add_to_git_credential=True` if you want to set the git credential as well.
Token is valid (permission: write).
Your token has been saved to /home/puttview/.cache/huggingface/token
Login successful


In [2]:
model_name = "berkouille/mistral_merged_model_sft4"
new_model = "assistant_DPO_4_v1"

dataset = load_dataset("Intel/orca_dpo_pairs")['train']
dataset = dataset.select(range(2000))
dataset[1]

{'system': 'You are an AI assistant. You will be given a task. You must generate a detailed and long answer.',
 'question': 'Generate an approximately fifteen-word sentence that describes all this data: Midsummer House eatType restaurant; Midsummer House food Chinese; Midsummer House priceRange moderate; Midsummer House customer rating 3 out of 5; Midsummer House near All Bar One',
 'chosen': 'Midsummer House is a moderately priced Chinese restaurant with a 3/5 customer rating, located near All Bar One.',
 'rejected': ' Sure! Here\'s a sentence that describes all the data you provided:\n\n"Midsummer House is a moderately priced Chinese restaurant with a customer rating of 3 out of 5, located near All Bar One, offering a variety of delicious dishes."'}

In [3]:
def chatml_format(example):
    # Format system
    system = ""
    if len(example['system']) > 0:
        system = "<|im_start|>system\n" + example['system'] + "<|im_end|>\n"

    # Format instruction
    prompt = "<|im_start|>user\n" + example['question'] + "<|im_end|>\n<|im_start|>assistant\n"

    # Format chosen answer
    chosen = example['chosen'] + "<|im_end|>\n"

    # Format rejected answer
    rejected = example['rejected'] + "<|im_end|>\n"

    return {
        "prompt": system + prompt,
        "chosen": chosen,
        "rejected": rejected,
    }

# Load dataset
#dataset = load_dataset("Intel/orca_dpo_pairs")['train']

# Save columns
original_columns = dataset.column_names

# Tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_name)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "left"

# Format dataset
dataset = dataset.map(
    chatml_format,
    remove_columns=original_columns
)

# Print sample
dataset[0]

{'chosen': '[\n  ["AFC Ajax (amateurs)", "has ground", "Sportpark De Toekomst"],\n  ["Ajax Youth Academy", "plays at", "Sportpark De Toekomst"]\n]<|im_end|>\n',
 'rejected': " Sure, I'd be happy to help! Here are the RDF triplets for the input sentence:\n\n[AFC Ajax (amateurs), hasGround, Sportpark De Toekomst]\n[Ajax Youth Academy, playsAt, Sportpark De Toekomst]\n\nExplanation:\n\n* AFC Ajax (amateurs) is the subject of the first triplet, and hasGround is the predicate that describes the relationship between AFC Ajax (amateurs) and Sportpark De Toekomst.\n* Ajax Youth Academy is the subject of the second triplet, and playsAt is the predicate that describes the relationship between Ajax Youth Academy and Sportpark De Toekomst.\n\nNote that there may be other possible RDF triplets that could be derived from the input sentence, but the above triplets capture the main relationships present in the sentence.<|im_end|>\n",
 'prompt': "<|im_start|>user\nYou will be given a definition of a ta

In [6]:
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True, bnb_4bit_use_double_quant=True, bnb_4bit_quant_type="nf4", bnb_4bit_compute_dtype=torch.bfloat16
)
 
# Load model and tokenizer
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    device_map="auto",
    use_cache=False,
    #attn_implementation="flash_attention_2",
    torch_dtype=torch.bfloat16,
    #quantization_config=bnb_config
)
tokenizer = AutoTokenizer.from_pretrained(model_name)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = 'left' # to prevent errors with FA
tokenizer.truncation_side = 'left' # to prevent cutting off last generation

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]



In [None]:
!pip install ninja packaging
!MAX_JOBS=4 pip install flash-attn --no-build-isolation

In [7]:
# LoRA configuration
peft_config = LoraConfig(
    r=16,
    lora_alpha=16,
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM",
    target_modules=['k_proj', 'gate_proj', 'v_proj', 'up_proj', 'q_proj', 'o_proj', 'down_proj']
)


In [8]:

# Training arguments
training_args = TrainingArguments(
    #num_train_epochs=2,
    per_device_train_batch_size=1,
    gradient_accumulation_steps=1,
    gradient_checkpointing=True,
    learning_rate=5e-5,                     
    lr_scheduler_type="cosine",
    max_steps=55,
    save_strategy="no",
    logging_steps=5,
    output_dir=new_model,
    optim="paged_adamw_32bit",
    warmup_steps=20,                           
    bf16=True, 
    tf32=True,   
    report_to="wandb", #cosmis fire
)

# Create DPO trainer
dpo_trainer = DPOTrainer(
    model,
    ref_model=None, # set to none since we use peft
    args=training_args,
    train_dataset=dataset,
    tokenizer=tokenizer,
    peft_config=peft_config,
    beta=0.5,
    loss_type= 'sigmoid' ,   
    max_prompt_length=256,
    max_length=512,
)

# Fine-tune model with DPO
dpo_trainer.train()

#cosmis fire

dpo_trainer.model.save_pretrained("final_checkpoint")
tokenizer.save_pretrained("final_checkpoint")

# Flush memory
del dpo_trainer, model
gc.collect()
torch.cuda.empty_cache()

# Reload model in FP16 (instead of NF4)
base_model = AutoModelForCausalLM.from_pretrained(
    model_name,
    return_dict=True,
    torch_dtype=torch.float16,
)
tokenizer = AutoTokenizer.from_pretrained(model_name)

# Merge base model with the adapter
model = PeftModel.from_pretrained(base_model, "final_checkpoint")
model = model.merge_and_unload()

# Save model and tokenizer
model.save_pretrained(new_model)
tokenizer.save_pretrained(new_model)

# Push them to the HF Hub
model.push_to_hub(new_model, use_temp_dir=False, token="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")
tokenizer.push_to_hub(new_model, use_temp_dir=False, token="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")



Map:   0%|          | 0/2000 [00:00<?, ? examples/s]

dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


NotImplementedError: Cannot copy out of meta tensor; no data!

: 