In [1]:
# pip install --upgrade trl 

In [1]:
# Importing packages
import os
import gc
import torch
import transformers
from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments, BitsAndBytesConfig
from peft import LoraConfig, PeftModel, get_peft_model, prepare_model_for_kbit_training, AutoPeftModelForCausalLM
from trl import DPOTrainer, DPOConfig
import bitsandbytes as bnb
from tqdm import tqdm

  from .autonotebook import tqdm as notebook_tqdm


In [8]:
from datasets import load_dataset
dataset = load_dataset("Arnab13/lima_preferred")['train']

In [10]:
# Define model names and tokens
peft_model_name = "mistralai/Mistral-7B-Instruct-v0.2" # The model obtained after the SFT step
new_model = "Mistral-DPO-lima" 

In [11]:
# Tokenizer setup
tokenizer = AutoTokenizer.from_pretrained(peft_model_name)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "left"

In [12]:
# LoRA configuration
peft_config = LoraConfig(
    r=8,
    lora_alpha=8,
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM",
    target_modules=['k_proj', 'v_proj', 'q_proj', 'dense']
)

In [13]:
# Load the base model with BitsAndBytes configuration
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    llm_int8_threshold=6.0,
    llm_int8_has_fp16_weight=False,
    bnb_4bit_compute_dtype=torch.bfloat16,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
)

In [14]:
# Load the entire model on the GPU 0
device_map = {"": 0}

In [15]:
# Loads model from hugging face and device mapping
model = AutoModelForCausalLM.from_pretrained(
    peft_model_name,
    quantization_config=bnb_config,
    device_map=device_map
)

model.config.use_cache = False

#Configure the pad token in the model
model.config.pad_token_id = tokenizer.pad_token_id

Loading checkpoint shards: 100%|██████████| 3/3 [00:07<00:00,  2.36s/it]
You are calling `save_pretrained` to a 4-bit converted model, but your `bitsandbytes` version doesn't support it. If you want to save 4-bit models, make sure to have `bitsandbytes>=0.41.3` installed.


In [16]:
# Training arguments
training_args = DPOConfig(
    per_device_train_batch_size=2,
    gradient_accumulation_steps=4,
    gradient_checkpointing=True,
    learning_rate=5e-5,
    lr_scheduler_type="cosine",
    max_steps=50, # we set up the max_steps to 50, due to free GPU useage
    save_strategy="no",
    logging_steps=1,
    output_dir=new_model,
    optim="paged_adamw_32bit",
    warmup_steps=5,
)

In [17]:
# Create DPO trainer
dpo_trainer = DPOTrainer(
    model,
    args=training_args,
    train_dataset=dataset,
    tokenizer=tokenizer,
    peft_config=peft_config,
    beta=0.1, 
    max_prompt_length=512,
    max_length=1024,
)


Deprecated positional argument(s) used in DPOTrainer, please use the DPOConfig to set these arguments instead.
Map: 100%|██████████| 50/50 [00:00<00:00, 400.95 examples/s]
max_steps is given, it will override any value given in num_train_epochs


In [18]:
gc.collect()
torch.cuda.empty_cache()

In [19]:
# Fine-tune model with DPO
dpo_trainer.train()

You are using 8-bit optimizers with a version of `bitsandbytes` < 0.41.1. It is recommended to update your version as a major bug has been fixed in 8-bit optimizers.
Could not estimate the number of tokens of the input, floating-point operations will not be computed


Step,Training Loss
1,0.6931
2,0.6931
3,0.6949
4,0.6934
5,0.6971
6,0.6933
7,0.6441
8,0.6201
9,0.5933
10,0.5876


TrainOutput(global_step=50, training_loss=0.3773615771532059, metrics={'train_runtime': 135.2442, 'train_samples_per_second': 2.958, 'train_steps_per_second': 0.37, 'total_flos': 0.0, 'train_loss': 0.3773615771532059, 'epoch': 8.0})

In [20]:
# Save artifacts
dpo_trainer.model.save_pretrained("dpo_model")
tokenizer.save_pretrained("dpo_model")

('dpo_model/tokenizer_config.json',
 'dpo_model/special_tokens_map.json',
 'dpo_model/tokenizer.model',
 'dpo_model/added_tokens.json',
 'dpo_model/tokenizer.json')

In [21]:
# Flush memory
del dpo_trainer, model
gc.collect()
torch.cuda.empty_cache()

In [22]:
# Reload model in FP16 (instead of NF4)
base_model = AutoModelForCausalLM.from_pretrained(
    peft_model_name,
    return_dict=True,
    torch_dtype=torch.float16,
)

Loading checkpoint shards: 100%|██████████| 3/3 [00:01<00:00,  1.78it/s]


In [23]:
tokenizer = AutoTokenizer.from_pretrained(peft_model_name)

In [24]:
# Merge base model with the adapter
model = PeftModel.from_pretrained(base_model, "dpo_model")
model = model.merge_and_unload()

In [25]:
# Save model and tokenizer
model.save_pretrained(new_model)
tokenizer.save_pretrained(new_model)

('Mistral-DPO-lima/tokenizer_config.json',
 'Mistral-DPO-lima/special_tokens_map.json',
 'Mistral-DPO-lima/tokenizer.model',
 'Mistral-DPO-lima/added_tokens.json',
 'Mistral-DPO-lima/tokenizer.json')

In [32]:
model.to("cuda")

MistralForCausalLM(
  (model): MistralModel(
    (embed_tokens): Embedding(32000, 4096)
    (layers): ModuleList(
      (0-31): 32 x MistralDecoderLayer(
        (self_attn): MistralAttention(
          (q_proj): Linear(in_features=4096, out_features=4096, bias=False)
          (k_proj): Linear(in_features=4096, out_features=1024, bias=False)
          (v_proj): Linear(in_features=4096, out_features=1024, bias=False)
          (o_proj): Linear(in_features=4096, out_features=4096, bias=False)
          (rotary_emb): MistralRotaryEmbedding()
        )
        (mlp): MistralMLP(
          (gate_proj): Linear(in_features=4096, out_features=14336, bias=False)
          (up_proj): Linear(in_features=4096, out_features=14336, bias=False)
          (down_proj): Linear(in_features=14336, out_features=4096, bias=False)
          (act_fn): SiLU()
        )
        (input_layernorm): MistralRMSNorm()
        (post_attention_layernorm): MistralRMSNorm()
      )
    )
    (norm): MistralRMSNorm()
  

In [30]:
import pandas as pd
df = pd.DataFrame(dataset)

In [33]:
responses=[]
for i in tqdm(range(len(df))):
    instruction = df['prompt'].values[i]
    prompt = "[INST] " + instruction +  " [/INST]"
    inputs = tokenizer.encode(prompt, return_tensors="pt").to("cuda")
    outputs = model.generate(inputs, max_new_tokens = 100, temperature=0.9, do_sample=True)
    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
    responses.append(response.split('[/INST] ')[-1])

  0%|          | 0/50 [00:00<?, ?it/s]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
  2%|▏         | 1/50 [00:04<03:38,  4.46s/it]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
  4%|▍         | 2/50 [00:08<03:33,  4.45s/it]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
  6%|▌         | 3/50 [00:13<03:28,  4.44s/it]The attention mask and the pad token id were not set. As a consequence, you may obs

In [34]:
df['response'] = responses

In [35]:
df

Unnamed: 0,prompt,chosen,rejected,__index_level_0__,response
0,I'm using wp_nav_menu and am trying to create ...,In the context of the `wp_nav_menu` function i...,In the context of the `wp_nav_menu()` function...,31,In the context of the `wp_nav_menu` function i...
1,I'm trying to write a Bash script that will ov...,To overwrite an existing directory with the co...,To overwrite the contents of directory `bar` w...,109,To overwrite the contents of `bar/` with the c...
2,What is likely to happen when you plug two end...,When you connect two ends of a network cable t...,When you connect two ends of a network cable t...,136,When you connect two ends of a network cable t...
3,"What does %~dp0 mean, and how does it work?\nI...",`%~dp0` is a variable used in batch scripts in...,`%~dp0` is a variable modification in batch sc...,88,`%~dp0` is a type of variable used in batch sc...
4,Write the ending. The person to reply to your ...,Once upon a time in the mystical land of Eldam...,"Once upon a time in a mystical land, there was...",918,"Once upon a time in a far-off land, there was ..."
5,I am from Kenya. Can I travel to the Netherlan...,I cannot provide you with definitive answers t...,I cannot provide definitive answers to your sp...,1025,I cannot provide you with definitive answers t...
6,You are stuck as a propaganda writer for North...,"I understand the predicament I'm in, and I wil...","Dear Beloved Comrades,\n\nI stand before you t...",870,"Dear Esteemed Comrades,\n\nI am filled with im..."
7,So I'm pretty far into writing my dystopian no...,It's understandable that you may have concerns...,It is important to consider the potential impa...,318,It is important to consider the potential impa...
8,Did the ancients or other pre-industrial civil...,"Yes, many ancient and pre-industrial civilizat...","Yes, some ancient and pre-industrial civilizat...",261,"Yes, some ancient and pre-industrial civilizat..."
9,Write a 2-3 sentence script for a movie advert...,(Suspenseful music plays in the background) 'I...,Prepare to be terrorized in the chilling new h...,535,Prepare to be scared out of your wits in the c...


In [36]:
model.push_to_hub(new_model, check_pr=True)
tokenizer.push_to_hub(new_model,check_pr=True)


Upload 3 LFS files:   0%|          | 0/3 [00:00<?, ?it/s].94G [00:00<?, ?B/s][A


model-00002-of-00003.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s][A[A[A

model-00003-of-00003.safetensors:   0%|          | 0.00/4.54G [00:00<?, ?B/s][A[A


model-00002-of-00003.safetensors:   0%|          | 8.19k/5.00G [00:00<51:51:39, 26.8kB/s][A[A[A
model-00001-of-00003.safetensors:   0%|          | 8.19k/4.94G [00:00<55:19:41, 24.8kB/s][A

model-00003-of-00003.safetensors:   0%|          | 8.19k/4.54G [00:00<54:20:25, 23.2kB/s][A[A
model-00001-of-00003.safetensors:   0%|          | 12.4M/4.94G [00:00<04:43, 17.4MB/s]   [A
model-00001-of-00003.safetensors:   0%|          | 16.0M/4.94G [00:01<08:37, 9.53MB/s][A

model-00003-of-00003.safetensors:   0%|          | 12.7M/4.54G [00:01<09:38, 7.82MB/s]   [A[A


model-00002-of-00003.safetensors:   0%|          | 12.7M/5.00G [00:02<14:39, 5.67MB/s]   [A[A[A

model-00003-of-00003.safetensors:   0%|          | 16.0M/4.54G [00:04<2

CommitInfo(commit_url='https://huggingface.co/Arnab13/Mistral-DPO-lima/commit/b22597b1734ec5899ee433a7d7e58422b40b5af2', commit_message='Upload tokenizer', commit_description='', oid='b22597b1734ec5899ee433a7d7e58422b40b5af2', pr_url=None, pr_revision=None, pr_num=None)