In [None]:
!pip install git+https://github.com/huggingface/trl git+https://github.com/huggingface/peft galore-torch bitsandbytes transformers --upgrade --no-deps

In [None]:
import datasets
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
from trl import ModelConfig, ORPOConfig, ORPOTrainer, ORPOConfig
import torch
from peft import LoraConfig

In [None]:
ds1 = datasets.load_dataset("flozi00/german-capy-dpo", split="train")
ds2 = datasets.load_dataset("flozi00/mixdpo-german", split="train")
ds3 = datasets.load_dataset("flozi00/dpo-mix-formatted", split="train")
ds = datasets.concatenate_datasets([ds1,ds2,ds3])
ds = ds.rename_column("prompt","prompt_list").rename_column("chat", "prompt")

In [None]:
peft_config = LoraConfig(target_modules="all-linear", layer_replication= [
    [
      0,
      16
    ],
    [
      8,
      24
    ],
    [
      16,
      32
    ]
  ])

quantization_config = BitsAndBytesConfig(load_in_4bit=True)

model = AutoModelForCausalLM.from_pretrained("mistralai/Mistral-7B-Instruct-v0.2", 
                                             attn_implementation="flash_attention_2", 
                                             low_cpu_mem_usage=True, torch_dtype=torch.bfloat16)
tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-Instruct-v0.2")
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

model.gradient_checkpointing_enable(gradient_checkpointing_kwargs={"use_reentrant": False})

In [None]:
LR = 0.0001

args = ORPOConfig(output_dir="mistral-instruct-german-orpo", per_device_train_batch_size=2, gradient_accumulation_steps=32, max_prompt_length=5000, max_length=6000,
                push_to_hub=True, gradient_checkpointing=False, dataloader_pin_memory=False, bf16=True, hub_private_repo=True,
                learning_rate=LR, optim="lion_8bit", logging_steps=1,
                optim_target_modules=["attn", "mlp"])

In [None]:
trainer = ORPOTrainer(
    model,
    args = args,
    train_dataset=ds,
    eval_dataset=ds,
    tokenizer=tokenizer,
    peft_config=peft_config,
)

# train and save the model
trainer.train()
trainer.save_model(args.output_dir)