In [None]:
%pip install datasets trl peft sentencepiece wandb triton

In [None]:
import os

current_path = os.environ.get('PATH')

os.environ['PATH'] = f'/home/andy/repos/monorepo/bin:{current_path}'

In [None]:
import gc
import torch
import re

from transformers import AutoTokenizer, TrainingArguments, AutoModelForCausalLM
from unsloth import FastMistralModel, PatchDPOTrainer
PatchDPOTrainer()
from datasets import load_dataset
from peft import LoraConfig, PeftModel
from trl import DPOTrainer
import wandb

# Defined in the secrets tab in Google Colab
wb_token = '<replace>'
hf_token = '<replace>'
wandb.login(key=wb_token)

model_name = "openchat/openchat-3.5-0106"
new_model = "/home/andy/mnt/drive/trained/openchat-nectar-0.2"

In [None]:
# same as format_as_chat(), but specially handles entries with multiple turns
def format_as_chat(example):
    prompt = example['prompt']
    messages = parse_conversation(prompt)

    prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)

    # in the Nectar dataset, each example has an 'answers' list of at most 7 answers
    # each 'answer' has 'answer', 'model', and 'rank'
    # 'answer' is the text, 'model' is the model that generated that text, and 'rank' is how gpt4-turbo ranks the answer
    # since DPO simply wants a "chosen" and "rejected", we will pick the top ranked answer as "chosen",
    # and the 3rd ranking answer as "rejected". We skip the 2nd to improve the signal between "good" and "bad" (in theory).
    chosen = find_entry_with_rank(example['answers'], 1)

    if len(example['answers']) > 2:
        rejected = find_entry_with_rank(example['answers'], 3)
    elif len(example['answers'] > 1):
        rejected = find_entry_with_rank(example['answers'], 2)
    else:
        assert(False, 'Expected at least a rank 2 or 3 to exist')

    assert(chosen['rank'] == 1)
    assert(rejected['rank'] > 1)

    return {
        "prompt": prompt,
        "chosen": chosen['answer'] + eos_token_text,
        # question: should the rejected answer exclude the eos token, since we *do* was to highly value the eos token?
        "rejected": rejected['answer'] + eos_token_text,
    }

def parse_conversation(text):
    # Regular expression pattern to match '\n\nHuman:' or '\n\nAssistant:' followed by any text
    pattern = r"\n\n(Human|Assistant): (.*?)(?=\n\n(Human|Assistant): |\Z)"
    
    # Find all matches using the regular expression
    matches = re.findall(pattern, text, re.DOTALL)

    # Initialize an empty list to store the parsed data
    parsed_data = []

    # Iterate over the matches and create a dictionary for each
    for role, content, _ in matches:
        if role == 'Human':
            parsed_data.append({"role": "user", "content": content})
        elif role == 'Assistant':
            parsed_data.append({"role": "assistant", "content": content})

    assert(parsed_data[-1]["role"] == "assistant")
    assert(len(parsed_data[-1]["content"]) == 0)

    parsed_data.pop() # remove final, empty assistant response - it will be added back in the chat jinja turn formatting
    
    return parsed_data

def find_entry_with_rank(entries, rank):
    """Given a list of elements and a target rank, return the first element with that rank."""
    return next((elem for elem in entries if elem['rank'] == rank), None)

# Load dataset
dataset = load_dataset("berkeley-nest/Nectar")['train']

# Save columns
original_columns = dataset.column_names

# Tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_name)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "left"
eos_token_text = tokenizer.eos_token
print(f'eos token: {eos_token_text}')

# Format dataset
# weird: some entries don't have an answer with rank 1?
dataset = dataset.filter(lambda x: any([e for e in x['answers'] if e['rank'] == 1]))
dataset = dataset.map(
    format_as_chat,
    remove_columns=original_columns
)

# Print sample
dataset[0]

In [None]:
max_seq_length = 2048 # Supports automatic RoPE Scaling, so choose any number.
dtype = None # None for auto detection. Float16 for Tesla T4, V100, Bfloat16 for Ampere+
load_in_4bit = True # Use 4bit quantization to reduce memory usage. Can be False.

# LoRA configuration
# Not used with Unsloth
# peft_config = LoraConfig(
#     r=16,
#     lora_alpha=16,
#     lora_dropout=0.05,
#     bias="none",
#     task_type="CAUSAL_LM",
#     target_modules=['k_proj', 'gate_proj', 'v_proj', 'up_proj', 'q_proj', 'o_proj', 'down_proj']
# )

# Before unsloth:
# Model to fine-tune
# model = AutoModelForCausalLM.from_pretrained(
#     model_name,
#     torch_dtype=torch.float16,
#     load_in_4bit=True
# )
# model.config.use_cache = False

# With Unsloth:
model, _ = FastMistralModel.from_pretrained(
    model_name = model_name,
    max_seq_length = max_seq_length,
    dtype = dtype,
    load_in_4bit = load_in_4bit,
)


# Before Unsloth:
# Reference model
# ref_model, _ = FastMistralModel.from_pretrained(
#     model_name,
#     torch_dtype=torch.float16,
#     load_in_4bit=True
# )
# With Unsloth:
# ref_model, _ = FastMistralModel.from_pretrained(
#     model_name = model_name,
#     max_seq_length = max_seq_length,
#     dtype = dtype,
#     load_in_4bit = load_in_4bit,
# )

model = FastMistralModel.get_peft_model(
    model,
    r = 16,
    target_modules = ["q_proj", "k_proj", "v_proj", "o_proj",
                      "gate_proj", "up_proj", "down_proj",],
    lora_alpha = 16,
    lora_dropout = 0, # Dropout = 0 is currently optimized
    bias = "none",    # Bias = "none" is currently optimized
    use_gradient_checkpointing = True,
    random_state = 3407,
)

# Training arguments
training_args = TrainingArguments(
    per_device_train_batch_size=4,
    gradient_accumulation_steps=4,
    gradient_checkpointing=True,
    learning_rate=5e-5,
    lr_scheduler_type="cosine",
    max_steps=500,
    save_strategy="no",
    logging_steps=1,
    output_dir=new_model,
    optim="paged_adamw_32bit",
    # warmup_steps=100,
    warmup_ratio=0.1,
    bf16=True,
    report_to="wandb",
)

# Create DPO trainer
dpo_trainer = DPOTrainer(
    model,
    # None, # ref_model can be none??
    # ref_model,
    args=training_args,
    train_dataset=dataset,
    tokenizer=tokenizer,
    # peft_config=peft_config, # not used with Unsloth
    beta=0.1,
    max_prompt_length=1024,
    max_length=1536,
)

# Fine-tune model with DPO
dpo_trainer.train()

In [None]:
# # Save artifacts
# dpo_trainer.model.save_pretrained("final_checkpoint")
# tokenizer.save_pretrained("final_checkpoint")

# # Flush memory
# del dpo_trainer, model #, ref_model
# gc.collect()
# torch.cuda.empty_cache()

# # Reload model in FP16 (instead of NF4)
# base_model = AutoModelForCausalLM.from_pretrained(
#     model_name,
#     return_dict=True,
#     torch_dtype=torch.float16,
# )
# tokenizer = AutoTokenizer.from_pretrained(model_name)

# # Merge base model with the adapter
# model = PeftModel.from_pretrained(base_model, "final_checkpoint")
# model = model.merge_and_unload()

# # Save model and tokenizer
# model.save_pretrained(new_model)
# tokenizer.save_pretrained(new_model)

name = 'openchat-nectar-0.2'
model = AutoModelForCausalLM.from_pretrained(name)
tokenizer = AutoTokenizer.from_pretrained(name)

# Push them to the HF Hub
model.push_to_hub(name, use_temp_dir=False, token=hf_token)
tokenizer.push_to_hub(name, use_temp_dir=False, token=hf_token)