In [1]:
import dotenv
dotenv.load_dotenv()

True

In [2]:
import yaml
from datasets import Dataset
from trl import KTOTrainer, KTOConfig
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
from peft import LoraConfig

[2024-06-15 15:38:29,345] [INFO] [real_accelerator.py:203:get_accelerator] Setting ds_accelerator to cuda (auto detect)


/usr/bin/ld: cannot find -laio: No such file or directory
collect2: error: ld returned 1 exit status


In [3]:
from typing import Dict, Union, Any, Tuple
import torch
from torch import nn
import warnings
from contextlib import nullcontext
from transformers import PreTrainedModel
class MyKTOTrainer(KTOTrainer):
    def compute_loss(
        self,
        model: Union[PreTrainedModel, nn.Module],
        inputs: Dict[str, Union[torch.Tensor, Any]],
        return_outputs=False,
    ) -> Union[torch.Tensor, Tuple[torch.Tensor, Dict[str, torch.Tensor]]]:
        if not self.use_dpo_data_collator:
            warnings.warn(
                "compute_loss is only implemented for DPODataCollatorWithPadding, and you passed a datacollator that is different than "
                "DPODataCollatorWithPadding - you might see unexpected behavior. Alternatively, you can implement your own prediction_step method if you are using a custom data collator"
            )
        compute_loss_context_manager = torch.cuda.amp.autocast if self._peft_has_been_casted_to_bf16 else nullcontext

        with compute_loss_context_manager():
            loss, metrics = self.get_batch_loss_metrics(model, inputs)

        # Make sure to move the loss to the device the original accumulating loss is at back in the `Trainer` class:
        loss = loss.to(self.args.device)
        # force log the metrics
        if self.accelerator.is_main_process:
            self.store_metrics(metrics, train_eval="train")

        loss.requires_grad_(True) # this is the only change from the original code
        if return_outputs:
            return (loss, metrics)
        return loss

In [4]:
model_name = 'meta-llama/Meta-Llama-3-8B-Instruct'
checkpoint = yaml.safe_load(open('checkpoint_kto_dataset_python_cot.yaml'))
output_path = 'kto_output'
dataset = Dataset.from_dict(
    {
        'prompt': checkpoint['prompts'],
        'completion': checkpoint['completions'],
        'label': checkpoint['labels'],
    }
)

In [5]:
peft_config = LoraConfig(
    r=16,
    lora_alpha=32,
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM",
)

quantization_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_use_double_quant=True,
    bnb_4bit_compute_dtype='bfloat16',
) 

model = AutoModelForCausalLM.from_pretrained(model_name, quantization_config=quantization_config)
tokenizer = AutoTokenizer.from_pretrained(model_name)

if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

split = dataset.train_test_split(test_size=0.1)
train_dataset = split['train']
test_dataset = split['test']




`low_cpu_mem_usage` was None, now set to True since model is quantized.


Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


In [6]:
trainer = MyKTOTrainer(
    model,
    None,
    args=KTOConfig(
        output_dir=output_path,
        per_device_train_batch_size=8,
        per_device_eval_batch_size=8,
        bf16=True,
        gradient_accumulation_steps=8, 
        do_eval=True,
        logging_strategy='steps',
        save_strategy='steps',
        evaluation_strategy='steps',
        logging_steps=20,
        save_steps=20,
        eval_steps=20,
        num_train_epochs=10,
        load_best_model_at_end=True,
        warmup_steps=20,
        
        loss_type='bco'
    ),
    train_dataset=train_dataset,
    eval_dataset=test_dataset,
    tokenizer=tokenizer,
    peft_config=peft_config,
)



Tokenizing train dataset:   0%|          | 0/633 [00:00<?, ? examples/s]

Extracting KL train dataset:   0%|          | 0/633 [00:00<?, ? examples/s]

Processing tokenized train dataset:   0%|          | 0/633 [00:00<?, ? examples/s]

Processing tokenized train KL dataset:   0%|          | 0/633 [00:00<?, ? examples/s]

Tokenizing eval dataset:   0%|          | 0/71 [00:00<?, ? examples/s]

Extracting eval KL dataset:   0%|          | 0/71 [00:00<?, ? examples/s]

Processing tokenized eval dataset:   0%|          | 0/71 [00:00<?, ? examples/s]

Processing tokenized eval KL dataset:   0%|          | 0/71 [00:00<?, ? examples/s]

Filtering desirable examples:   0%|          | 0/633 [00:00<?, ? examples/s]

Filtering undesirable examples:   0%|          | 0/633 [00:00<?, ? examples/s]

                        You have different amounts of desirable/positive and undesirable/negative examples but the
                        weights on the desirable and undesirable losses don't seem to be in an ideal range. Based
                        on your data, we recommend EITHER desirable_weight in [1.33, 1.77]
                        or undesirable_weight in [0.57, 0.75] (but NOT BOTH).
                        See the documentation on how to optimally set these weights.
dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


In [7]:
trainer.train()

[34m[1mwandb[0m: Currently logged in as: [33mchengpong1127[0m. Use [1m`wandb login --relogin`[0m to force relogin


Could not estimate the number of tokens of the input, floating-point operations will not be computed


Step,Training Loss,Validation Loss,Rewards/chosen,Logps/chosen,Rewards/rejected,Logps/rejected,Rewards/margins,Kl
20,0.6931,0.693147,0.0,-183.387812,0.0,-250.126445,0.0,0.0
40,0.6931,0.693147,0.0,-183.387812,0.0,-250.126445,0.0,0.0
60,0.6931,0.693147,0.0,-183.387812,0.0,-250.126445,0.0,0.0
80,0.6931,0.693147,0.0,-183.387812,0.0,-250.126445,0.0,0.0
100,0.6931,0.693147,0.0,-183.387812,0.0,-250.126445,0.0,0.0



Cannot access gated repo for url https://huggingface.co/meta-llama/Meta-Llama-3-8B-Instruct/resolve/main/config.json.
Access to model meta-llama/Meta-Llama-3-8B-Instruct is restricted. You must be authenticated to access it. - silently ignoring the lookup for the file config.json in meta-llama/Meta-Llama-3-8B-Instruct.

Cannot access gated repo for url https://huggingface.co/meta-llama/Meta-Llama-3-8B-Instruct/resolve/main/config.json.
Access to model meta-llama/Meta-Llama-3-8B-Instruct is restricted. You must be authenticated to access it. - silently ignoring the lookup for the file config.json in meta-llama/Meta-Llama-3-8B-Instruct.

Cannot access gated repo for url https://huggingface.co/meta-llama/Meta-Llama-3-8B-Instruct/resolve/main/config.json.
Access to model meta-llama/Meta-Llama-3-8B-Instruct is restricted. You must be authenticated to access it. - silently ignoring the lookup for the file config.json in meta-llama/Meta-Llama-3-8B-Instruct.

Cannot access gated repo for url 

TrainOutput(global_step=110, training_loss=0.6931463241577148, metrics={'train_runtime': 4782.0291, 'train_samples_per_second': 1.51, 'train_steps_per_second': 0.023, 'total_flos': 0.0, 'train_loss': 0.6931463241577148, 'epoch': 9.67})