In [1]:
# Installations
! pip install torch==2.0.1 transformers peft accelerate trl bitsandbytes optimum auto-gptq --q

In [2]:
pip install datasets==2.15.0 --q

[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
cudf 23.8.0 requires cupy-cuda11x>=12.0.0, which is not installed.
cuml 23.8.0 requires cupy-cuda11x>=12.0.0, which is not installed.
dask-cudf 23.8.0 requires cupy-cuda11x>=12.0.0, which is not installed.
cudf 23.8.0 requires pandas<1.6.0dev0,>=1.3, but you have pandas 2.0.3 which is incompatible.
cudf 23.8.0 requires protobuf<5,>=4.21, but you have protobuf 3.20.3 which is incompatible.
cuml 23.8.0 requires dask==2023.7.1, but you have dask 2023.12.0 which is incompatible.
cuml 23.8.0 requires distributed==2023.7.1, but you have distributed 2023.12.0 which is incompatible.
dask-cuda 23.8.0 requires dask==2023.7.1, but you have dask 2023.12.0 which is incompatible.
dask-cuda 23.8.0 requires distributed==2023.7.1, but you have distributed 2023.12.0 which is incompatible.
dask-cuda 23.8.0 requires pa

### Hugging Face and WnB secrets and login

In [3]:
from kaggle_secrets import UserSecretsClient
user_secrets = UserSecretsClient()
secret_value_hf = user_secrets.get_secret("hf")
secret_value_wb = user_secrets.get_secret("wandb_api")

In [4]:
from huggingface_hub import notebook_login, login
login(secret_value_hf)

Token will not been saved to git credential helper. Pass `add_to_git_credential=True` if you want to set the git credential as well.
Token is valid (permission: write).
Your token has been saved to /root/.cache/huggingface/token
Login successful


In [5]:
import wandb
! wandb login --relogin $secret_value_wb

[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


In [6]:
import torch
from datasets import Dataset, load_dataset
from peft import AutoPeftModelForCausalLM, LoraConfig, get_peft_model, prepare_model_for_kbit_training
from transformers import AutoTokenizer, TrainingArguments, AutoModelForCausalLM, GPTQConfig
from trl import DPOTrainer



### Dataset load and dataset preparation

In [7]:
from datasets import load_dataset

dataset = load_dataset("Dahoas/full-hh-rlhf",split='test')

Downloading readme:   0%|          | 0.00/478 [00:00<?, ?B/s]

Downloading metadata:   0%|          | 0.00/930 [00:00<?, ?B/s]

Downloading data files:   0%|          | 0/2 [00:00<?, ?it/s]

Downloading data:   0%|          | 0.00/123M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/13.6M [00:00<?, ?B/s]

Extracting data files:   0%|          | 0/2 [00:00<?, ?it/s]

Generating train split:   0%|          | 0/112052 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/12451 [00:00<?, ? examples/s]

In [8]:
dataset

Dataset({
    features: ['prompt', 'response', 'chosen', 'rejected'],
    num_rows: 12451
})

In [9]:
original_columns = dataset.column_names

def return_prompt_and_responses(samples):
    return {
        "prompt": samples['prompt'],
        "chosen": samples["chosen"],
        "rejected": samples["rejected"],
    }

In [10]:
train_data = dataset.map(return_prompt_and_responses,batched=True,remove_columns=original_columns)

Map:   0%|          | 0/12451 [00:00<?, ? examples/s]

In [11]:
train_data

Dataset({
    features: ['prompt', 'chosen', 'rejected'],
    num_rows: 12451
})

### Tokenizer

In [12]:
tokenizer = AutoTokenizer.from_pretrained("TheBloke/OpenHermes-2-Mistral-7B-GPTQ")

if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

tokenizer_config.json:   0%|          | 0.00/1.42k [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/493k [00:00<?, ?B/s]

added_tokens.json:   0%|          | 0.00/44.0 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/174 [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


### SFT Models-

1. model - model to be tuned
2. model_ref - reference model to measure the KL Divergence

In [13]:
model = AutoModelForCausalLM.from_pretrained("TheBloke/OpenHermes-2-Mistral-7B-GPTQ", 
                                             torch_dtype=torch.float16, 
                                             low_cpu_mem_usage=True, 
                                             quantization_config=GPTQConfig(bits=4, use_exllama=False))

model_ref = AutoModelForCausalLM.from_pretrained("TheBloke/OpenHermes-2-Mistral-7B-GPTQ", 
                                                 torch_dtype=torch.float16, 
                                                 low_cpu_mem_usage=True, 
                                                 quantization_config=GPTQConfig(bits=4, use_exllama=False))

config.json:   0%|          | 0.00/1.32k [00:00<?, ?B/s]

You passed `quantization_config` to `from_pretrained` but the model you're loading already has a `quantization_config` attribute and has already quantized weights. However, loading attributes (e.g. ['use_cuda_fp16', 'use_exllama', 'max_input_length', 'exllama_config', 'disable_exllama']) will be overwritten with the one you passed to `from_pretrained`. The rest will be ignored.


model.safetensors:   0%|          | 0.00/4.16G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/136 [00:00<?, ?B/s]

You passed `quantization_config` to `from_pretrained` but the model you're loading already has a `quantization_config` attribute and has already quantized weights. However, loading attributes (e.g. ['use_cuda_fp16', 'use_exllama', 'max_input_length', 'exllama_config', 'disable_exllama']) will be overwritten with the one you passed to `from_pretrained`. The rest will be ignored.


In [14]:
model

MistralForCausalLM(
  (model): MistralModel(
    (embed_tokens): Embedding(32002, 4096, padding_idx=0)
    (layers): ModuleList(
      (0-31): 32 x MistralDecoderLayer(
        (self_attn): MistralAttention(
          (rotary_emb): MistralRotaryEmbedding()
          (k_proj): QuantLinear()
          (o_proj): QuantLinear()
          (q_proj): QuantLinear()
          (v_proj): QuantLinear()
        )
        (mlp): MistralMLP(
          (act_fn): SiLU()
          (down_proj): QuantLinear()
          (gate_proj): QuantLinear()
          (up_proj): QuantLinear()
        )
        (input_layernorm): MistralRMSNorm()
        (post_attention_layernorm): MistralRMSNorm()
      )
    )
    (norm): MistralRMSNorm()
  )
  (lm_head): Linear(in_features=4096, out_features=32002, bias=False)
)

In [15]:
train_df = train_data.to_pandas()

In [16]:
train_df.head()

Unnamed: 0,prompt,chosen,rejected
0,\n\nHuman: I've been seeing a lot of slugs out...,Slugs and other mollusks can play an importan...,"Hi there, this is a difficult question as it ..."
1,\n\nHuman: Can you tell me the stops on the B ...,"Certainly, the B Line subway has 23 stations ...","Sure, here are all the stops on the B Line in..."
2,\n\nHuman: Is a firefighter a good occupation?...,What makes a job good?,"By “work on”, I think you’re referring to the..."
3,\n\nHuman: I'm trying to learn how to grow the...,I'm happy to help you with anything else you ...,Pumpkins can grow pretty big in the right con...
4,\n\nHuman: Is there a simple tool that helps p...,What are you talking about?,I think a reasonable answer is “no” -- though...


In [17]:
val_df = train_df.sample(100)

In [18]:
train_data = Dataset.from_pandas(train_df)
val_data = Dataset.from_pandas(val_df)

### Peft Config

In [19]:
peft_config = LoraConfig(
        r=4,
        lora_alpha=6,
        lora_dropout=0.1,
        target_modules=["q_proj", "v_proj"],
        bias="none",
        task_type="CAUSAL_LM",
    )
peft_config.inference_mode = False

In [20]:
model = prepare_model_for_kbit_training(model)
model.config.use_cache=False
model.gradient_checkpointing_enable()
model.config.pretraining_tp=1
model = get_peft_model(model, peft_config)

In [21]:
model_ref = prepare_model_for_kbit_training(model_ref)
model_ref.config.use_cache=False
model_ref.gradient_checkpointing_enable()
model_ref.config.pretraining_tp=1
model_ref = get_peft_model(model_ref, peft_config)

### Training Arguments

In [22]:
training_args = TrainingArguments(
        per_device_train_batch_size=1,
        max_steps=250,
        remove_unused_columns=False,
        gradient_accumulation_steps=1,
        learning_rate=2e-4,
        evaluation_strategy="steps",
        logging_first_step=True,
        logging_steps=10,
        output_dir="mistral-dpo",
        optim="paged_adamw_32bit",
        warmup_steps=2,
        fp16=True,
        push_to_hub=True
    )

### DPO trainer with arguments 

In [23]:
dpo_trainer = DPOTrainer(
        model,
        model_ref,
        args=training_args,
        beta=0.1,
        train_dataset=train_data,
        eval_dataset=val_data,
        tokenizer=tokenizer,
        max_length=512,
        max_target_length=256,
        max_prompt_length=256
    )

### Finetuning the model with DPO

In [24]:
dpo_trainer.train()

[34m[1mwandb[0m: Currently logged in as: [33maritra-slg[0m. Use [1m`wandb login --relogin`[0m to force relogin
[34m[1mwandb[0m: Tracking run with wandb version 0.16.1
[34m[1mwandb[0m: Run data is saved locally in [35m[1m/kaggle/working/wandb/run-20231221_183010-pk6hfhbj[0m
[34m[1mwandb[0m: Run [1m`wandb offline`[0m to turn off syncing.
[34m[1mwandb[0m: Syncing run [33mdainty-fire-11[0m
[34m[1mwandb[0m: ⭐️ View project at [34m[4mhttps://wandb.ai/aritra-slg/huggingface[0m
[34m[1mwandb[0m: 🚀 View run at [34m[4mhttps://wandb.ai/aritra-slg/huggingface/runs/pk6hfhbj[0m
Could not estimate the number of tokens of the input, floating-point operations will not be computed


Step,Training Loss,Validation Loss,Rewards/chosen,Rewards/rejected,Rewards/accuracies,Rewards/margins,Logps/rejected,Logps/chosen,Logits/rejected,Logits/chosen
10,0.6703,0.684152,-0.0001,-0.026769,0.586538,0.026669,-179.52565,-183.906311,-2.429027,-2.471997
20,0.7119,0.675105,0.158416,0.099018,0.576923,0.059398,-178.267792,-182.321136,-2.454169,-2.498805
30,0.647,0.670219,0.356863,0.253994,0.576923,0.102869,-176.718033,-180.336685,-2.488573,-2.530555
40,0.6748,0.671212,0.343887,0.222875,0.528846,0.121012,-177.029221,-180.466446,-2.520648,-2.558065
50,0.6513,0.670684,0.440322,0.283795,0.557692,0.156527,-176.419998,-179.502106,-2.560783,-2.58528
60,0.6103,0.669485,0.68313,0.476878,0.557692,0.206252,-174.489182,-177.074036,-2.571908,-2.593309
70,1.0313,0.672447,0.706175,0.508402,0.557692,0.197773,-174.173935,-176.843582,-2.554307,-2.584262
80,0.6876,0.68041,0.699489,0.51445,0.538462,0.185039,-174.113464,-176.910431,-2.544337,-2.582863
90,0.9661,0.682803,0.711801,0.537588,0.538462,0.174213,-173.882095,-176.787292,-2.547856,-2.584605
100,0.7354,0.675699,0.676524,0.503933,0.557692,0.17259,-174.218643,-177.140076,-2.539931,-2.575778


TrainOutput(global_step=250, training_loss=0.7666875419616699, metrics={'train_runtime': 10540.7094, 'train_samples_per_second': 0.024, 'train_steps_per_second': 0.024, 'total_flos': 0.0, 'train_loss': 0.7666875419616699, 'epoch': 0.02})

### Pushing the model to the hub

In [25]:
dpo_trainer.push_to_hub("aritrasen/mistral-dpo")

Upload 3 LFS files:   0%|          | 0/3 [00:00<?, ?it/s]

events.out.tfevents.1703183408.95c7d1f2a3f6.26.0:   0%|          | 0.00/40.0k [00:00<?, ?B/s]

adapter_model.safetensors:   0%|          | 0.00/6.83M [00:00<?, ?B/s]

training_args.bin:   0%|          | 0.00/4.22k [00:00<?, ?B/s]

'https://huggingface.co/aritrasen/mistral-dpo/tree/main/'