In [1]:
from datasets import load_dataset

dataset = load_dataset("Ksgk-fy/alignment-sft-test01")

In [2]:
from datasets import load_dataset
from transformers import AutoModelForCausalLM, AutoTokenizer, Trainer, TrainingArguments

model_name = "HuggingFaceH4/zephyr-7b-beta"
# Load model and tokenizer
model = AutoModelForCausalLM.from_pretrained(model_name)
tokenizer = AutoTokenizer.from_pretrained(model_name)

dataset = load_dataset("Ksgk-fy/alignment-sft-test01", split="train")


def formatting_prompts_func(example):
    output_texts = []
    for i in range(len(example['prompt'])):
        text = f"### Question: {example['prompt'][i]}\n ### Answer: {example['completion'][i]}"
        output_texts.append(text)
    return output_texts

Loading checkpoint shards:   0%|          | 0/8 [00:00<?, ?it/s]

In [3]:
# from transformers import SFTTrainer
from trl import SFTTrainer


trainer = SFTTrainer(
    model,
    train_dataset=dataset,
    formatting_func=formatting_prompts_func,
)

# trainer.train()

dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


In [4]:
trainer.train()

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mfangyuan-yu18[0m ([33mksgk-hack[0m). Use [1m`wandb login --relogin`[0m to force relogin


  0%|          | 0/1134 [00:00<?, ?it/s]

KeyboardInterrupt: 

### Learning
1. BitsandBytes quantization works only with Nvidia GPUs

In [5]:
# bitsandbytes quantize our model to 4-bit

import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
from trl import setup_chat_format
from datasets import load_dataset

dataset = load_dataset("Ksgk-fy/alignment-sft-test01")

model_id = "HuggingFaceH4/zephyr-7b-beta"
bnb_config = BitsAndBytesConfig(
    load_i_4bit=True, bnb_4bit_use_double_quant=True, bnb_4bit_quant_type="nf4", bnb_4bit_compute_dtype=torch.bfloat16
)

# mps based device is restricted in the SFT realm with huggingface
model = AutoModelForCausalLM.from_pretrained(
    model_id,
    device_map="auto",
    # attn_implementation="flash_attention_2",
    # torch_dtype=torch.bfloat16,
    # quantization_config=bnb_config

)
tokenizer = AutoTokenizer.from_pretrained(model_id)
tokenizer.padding_side = 'right'

model, tokenizer = setup_chat_format(model, tokenizer)

Loading checkpoint shards:   0%|          | 0/8 [00:00<?, ?it/s]

In [9]:
dataset['train'].to_json("train_dataset.json", orient="records")
dataset['test'].to_json("test_dataset.json", orient="records")

Creating json from Arrow format:   0%|          | 0/4 [00:00<?, ?ba/s]

Creating json from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

221153

In [10]:
dataset = load_dataset("json", data_files="train_dataset.json", split="train")

Generating train split: 0 examples [00:00, ? examples/s]

In [13]:
dataset[0]

{'prompt': "Compare customers' response in the two conversations:\nConversation A: Customer: I am not interested in your insurance.\nConversation B: Sale: Hello, how can I help with your insurance needs today?\nIs customer A more insulting than customer B?",
 'completion': 'Yes',
 '__index_level_0__': 1378}

In [12]:
from peft import LoraConfig

# LoRA config based on QLoRA paper & Sebastian Rachka experiment
peft_config = LoraConfig(
    lora_alpha = 128,
    lora_dropout = 0.05,
    r = 256,
    bias = "none",
    target_modules = "all-linear",
    task_type="CASUAL_LM",
)

from transformers import TrainingArguments

args = TrainingArguments(
    output_dir= "Ksgk-fy/zephry-7b-align-sft-test01",
    num_train_epochs=3,
    per_device_train_batch_size=3,
    gradient_accumulation_steps=2,
    gradient_checkpointing=True,
    optim = "adamw_torch_fused",
    logging_steps=10,
    save_strategy="epoch",
    learning_rate=2e-4,
    # bf16=True,
    # tf32=True,
    max_grad_norm=0.3,
    warmup_ratio=0.03,
    lr_scheduler_type="constant",
    push_to_hub=True,
    report_to="tensorboard",
)


from trl import SFTTrainer

# Define a formatting function for the dataset
def formatting_func(examples):
    return {
        'input_ids': tokenizer(examples['text'], truncation=True, padding='max_length', max_length=512)['input_ids']
    }

trainer = SFTTrainer(
    model=model,
    args=args,
    train_dataset=dataset.map(formatting_func),
    tokenizer=tokenizer,
    packing=True,
    dataset_text_field='text',  # Assuming 'text' is the field in your dataset containing the text
)


Map:   0%|          | 0/3024 [00:00<?, ? examples/s]

KeyError: 'text'

In [None]:
# BitsAndBytesConfig int-4 config
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
from peft import PeftModel
from trl import setup_chat_format

model_id = "alignment-handbook/zephyr-7b-sft-full" # or `mistralai/Mistral-7B-v0.1` 

lora_id = "DESUCLUB/zephyr_dpo" # or `mistralai/Mistral-7B-v0.1`
 
bnb_config = BitsAndBytesConfig(
load_in_4bit=True, bnb_4bit_use_double_quant=True, bnb_4bit_quant_type="nf4", bnb_4bit_compute_dtype=torch.bfloat16
)
 
# Load model and tokenizer
config = LoraConfig.from_pretrained(lora_id)
 
 
model = AutoModelForCausalLM.from_pretrained(
config.base_model_name_or_path,
device_map="auto",
quantization_config=bnb_config,
torch_dtype=torch.bfloat16,
trust_remote_code=True,
)

tokenizer = AutoTokenizer.from_pretrained(model_id)
tokenizer.padding_side = 'right' # to prevent warnings
 
# # set chat template to OAI chatML, remove if you start from a fine-tuned model
model, tokenizer = setup_chat_format(model, tokenizer)
 
def format_dpo_data(sample):
    return {
    "prompt": sample['prompt'],
    "chosen": sample['messages'][1]['content'],
    "rejected": "No" if sample['messages'][1]['content'] == "Yes" else "Yes"
    }
 
dpo_dataset = dpo_data.map(format_dpo_data, remove_columns=dpo_data.features)



from peft import LoraConfig

# LoRA config based on QLoRA paper & Sebastian Raschka experiment
peft_config = LoraConfig(
    lora_alpha=128,
    lora_dropout=0.05,
    r=256,
    bias="none",
    target_modules="all-linear",
    task_type="CAUSAL_LM",
    )
 
from transformers import TrainingArguments
args = TrainingArguments(
    output_dir="zephyr_dpo_2", # directory to save and repository id
    num_train_epochs=3, # number of training epochs
    per_device_train_batch_size=3, # batch size per device during training
    gradient_accumulation_steps=2, # number of steps before performing a backward/update pass
    gradient_checkpointing=True, # use gradient checkpointing to save memory
    optim="adamw_torch_fused", # use fused adamw optimizer
    logging_steps=10, # log every 10 steps
    save_strategy="epoch", # save checkpoint every epoch
    learning_rate=2e-4, # learning rate, based on QLoRA paper
    bf16=True, # use bfloat16 precision
    tf32=True, # use tf32 precision
    max_grad_norm=0.3, # max gradient norm based on QLoRA paper
    warmup_ratio=0.03, # warmup ratio based on QLoRA paper
    lr_scheduler_type="constant", # use constant learning rate scheduler
    push_to_hub=True, # push model to hub
    report_to="wandb", # report metrics to wandb
    )


dpo_trainer = DPOTrainer(
    model,
    ref_model = None,
    args = args,
    beta = 0.1,
    train_dataset = dpo_dataset,
    tokenizer = tokenizer,
    peft_config=peft_config,

    )

dpo_trainer.train()