In [1]:
# !pip install -U transformers datasets accelerate peft trl bitsandbytes wandb


In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [4]:
import gc
import os

import torch
import wandb
from datasets import Dataset
# from google.colab import userdata
from peft import LoraConfig, PeftModel, prepare_model_for_kbit_training
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    TrainingArguments,
    pipeline,
)
from trl import ORPOConfig, ORPOTrainer, setup_chat_format
import pandas as pd

# wb_token = userdata.get('wandb')
# wandb.login(key=wb_token)
wb_token = '040aa731b4d960d66d197cba9df666bee4a7dfa9'
wandb.login(key=wb_token)

ModuleNotFoundError: No module named 'wandb'

In [None]:
if torch.cuda.get_device_capability()[0] >= 8:
    !pip install -qqq flash-attn
    attn_implementation = "flash_attention_2"
    torch_dtype = torch.bfloat16
else:
    attn_implementation = "eager"
    torch_dtype = torch.float16
attn_implementation

In [None]:
# base_model = "Qwen/Qwen2-1.5B"

# new_model = "ORPOQwen/Qwen2-1.5B"
# model_name = "Qwen/Qwen2-72B-Instruct"

base_model = 'Qwen/Qwen2-7B'
new_model = 'ORPOQwen/Qwen2-7B'

In [None]:
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch_dtype,
    bnb_4bit_use_double_quant=True,
)

In [None]:
peft_config = LoraConfig(
    r=16,
    lora_alpha=32,
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM",
    target_modules=['up_proj', 'down_proj', 'gate_proj', 'k_proj', 'q_proj', 'v_proj', 'o_proj']
)

In [None]:
tokenizer = AutoTokenizer.from_pretrained(base_model)


In [None]:
model = AutoModelForCausalLM.from_pretrained(
    base_model,
    quantization_config=bnb_config,
    device_map="auto",
    attn_implementation=attn_implementation
)


In [None]:
model, tokenizer = setup_chat_format(model, tokenizer)
model = prepare_model_for_kbit_training(model)

In [13]:
df = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/novelgen/preference.csv')
d = df[['prompt', 'chosen', 'rejected']].head().to_dict()
d['prompt'] = d['prompt'].values()
d['chosen'] = d['chosen'].values()
d['rejected'] = d['rejected'].values()
# d.keys()
dataset = Dataset.from_dict(d)
# dataset = dataset.shuffle(seed=42)

dataset = dataset.shuffle(seed=42).select(range(5))

In [15]:
def format_chat_template(row):
    row["chosen"] = tokenizer.apply_chat_template(row["chosen"], tokenize=False)
    row["rejected"] = tokenizer.apply_chat_template(row["rejected"], tokenize=False)
    return row

dataset = dataset.map(
    format_chat_template,
    num_proc= os.cpu_count(),
)
dataset = dataset.train_test_split(test_size=0.01)

2024-07-25 02:59:18.543605: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-07-25 02:59:19.421414: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
Map (num_proc=16):   0%|          | 0/508 [00:05<?, ? examples/s]


NameError: name 'tokenizer' is not defined

In [None]:
orpo_args = ORPOConfig(
    learning_rate=8e-6,
    beta=0.1,
    lr_scheduler_type="linear",
    max_length=1024,
    max_prompt_length=512,
    per_device_train_batch_size=2,
    per_device_eval_batch_size=2,
    gradient_accumulation_steps=4,
    optim="paged_adamw_8bit",
    num_train_epochs=1,
    evaluation_strategy="steps",
    eval_steps=0.2,
    logging_steps=1,
    warmup_steps=10,
    report_to="wandb",
    output_dir="./results/",
)

trainer = ORPOTrainer(
    model=model,
    args=orpo_args,
    train_dataset=dataset["train"],
    eval_dataset=dataset["test"],
    peft_config=peft_config,
    tokenizer=tokenizer,
)
trainer.train()
trainer.save_model(new_model)
