In [None]:
!pip install git+https://github.com/huggingface/trl flash-attn transformers galore-torch --no-build-isolation --upgrade

In [None]:
import datasets
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
from trl import ModelConfig, ORPOConfig, ORPOTrainer, ORPOConfig
from bitsandbytes.optim import PagedAdamW8bit
from torch.optim.lr_scheduler import ExponentialLR
import torch

In [None]:
ds = datasets.load_dataset("flozi00/german-capy-dpo", split="train")
ds = ds.rename_column("prompt","prompt_list").rename_column("chat", "prompt")

In [None]:
model = AutoModelForCausalLM.from_pretrained("mistralai/Mistral-7B-Instruct-v0.2", attn_implementation="flash_attention_2", low_cpu_mem_usage=True, torch_dtype=torch.bfloat16)
tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-Instruct-v0.2")
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

In [None]:
LR = 0.0001

args = ORPOConfig(output_dir="mistral-instruct-german-orpo", per_device_train_batch_size=2, gradient_accumulation_steps=1, logging_steps=10, max_prompt_length=5000, max_length=6000,
                push_to_hub=True, gradient_checkpointing=True, dataloader_pin_memory=False, bf16=True, hub_private_repo=True,
                learning_rate=LR, optim="galore_adamw_layerwise",
                optim_target_modules=["attn", "mlp"])

In [None]:
trainer = ORPOTrainer(
    model,
    args = args,
    train_dataset=ds,
    eval_dataset=ds,
    tokenizer=tokenizer,
)

# train and save the model
trainer.train()
trainer.save_model(args.output_dir)