In [None]:
!pip install -q unsloth bitsandbytes datasets transformers trl

FileNotFoundError: [Errno 2] No such file or directory: '/content/oov_counts_viIPA.txt'

In [None]:
%%writefile cpotrain.py

from unsloth import FastLanguageModel
from trl import CPOConfig, CPOTrainer
import os
import torch
import wandb
import pandas as pd
from datasets import load_dataset, Dataset

def prepare_dataset(ds_path):
    dataset = pd.read_csv(ds_path)
    train_ds = Dataset.from_pandas(dataset)

    return train_ds

class Args:
    model_name = ""
    output_dir = "/kaggle/working/"
    dataset_path = "/kaggle/input/cpo-data/cpo_ds.csv"
    learning_rate = 1e-5
    warmup_ratio = 0.1
    num_train_epochs = 2
    per_device_train_batch_size = 2
    gradient_accumulation_steps = 2
    logging_steps = 50
    saving_steps = 200
    bf16 = 1
    fp16 = 0
    weight_decay = 0.01
    optim = "adamw_8bit"
    do_eval = False
    lora_r = 32
    lora_alpha = 64
    lora_dropout = 0.05
    report_to = "none"
    lr_scheduler_type = "cosine"
    torch_dtype = "bf16"
    device="cuda:0"


def main():
    args = Args()

    if torch.cuda.is_available():
        gpu_count = torch.cuda.device_count()
    else:
        args.device = "cpu"
        gpu_count = 0

    if args.torch_dtype == 'fp32':
        args.torch_dtype = torch.float32
    elif args.torch_dtype == 'fp16':
        args.torch_dtype = torch.float16
    elif args.torch_dtype == 'bf16':
        args.torch_dtype = torch.bfloat16

    # wandb.login(key='01cc475b8eb6c858031cb942b721df5845926606', relogin=True)
    # run = wandb.init(
    #     project='sailor2-SFT',
    #     name='SFT-sailor2-1B-lora-32-extra'
    # )

    train_ds = prepare_dataset(Args.dataset_path)

    model, tokenizer = FastLanguageModel.from_pretrained(
        model_name=args.model_name,
        load_in_4bit=True,
        device_map=args.device,
        dtype=args.torch_dtype
    )

    model = FastLanguageModel.get_peft_model(
        model,
        r=Args.lora_r,
        lora_alpha=Args.lora_alpha,
        target_modules=["q_proj", "k_proj", "v_proj", "o_proj","gate_proj", "up_proj", "down_proj"],
        use_gradient_checkpointing="unsloth",
        bias="none",
    )

    model.print_trainable_parameters()

    # model.config.use_cache = False
    # model.config.pretraining_tp = 1
    model = model.to(args.device)
    model.gradient_checkpointing_enable(gradient_checkpointing_kwargs={"use_reentrant": False})

    cpo_args = CPOConfig(
        learning_rate=Args.learning_rate,
        warmup_ratio=Args.warmup_ratio,
        num_train_epochs=Args.num_train_epochs,
        per_device_train_batch_size=Args.per_device_train_batch_size,
        gradient_accumulation_steps=Args.gradient_accumulation_steps,
        logging_steps=Args.logging_steps,
        save_strategy="steps",
        save_steps=Args.saving_steps,
        bf16=bool(Args.bf16),
        fp16=bool(Args.fp16),
        weight_decay=Args.weight_decay,
        optim=Args.optim,
        do_eval=Args.do_eval,
        report_to=Args.report_to,
        lr_scheduler_type=Args.lr_scheduler_type,
        ddp_find_unused_parameters=False if gpu_count > 1 else None,
    )

    cpo_trainer = CPOTrainer(
        model=model,
        train_dataset=train_ds,
        args=cpo_args,
        processing_class=tokenizer
    )

    cpo_trainer.train()
    model.save_pretrained(args.save_dir)
    wandb.finish()


if __name__ == '__main__':
    main()


In [None]:
!torchrun cpotrain.py