In [1]:
import torch
from datasets import Dataset
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    TrainingArguments,
)
from peft import LoraConfig
from trl import SFTTrainer

In [2]:
import pandas as pd

# model_name = "NousResearch/Llama-2-7b-chat-hf"

loaded_df = pd.read_pickle('../data/social_explorer/5_dataset_expanded.pkl')
dataset = Dataset.from_pandas(loaded_df, split='train')

OUTPUT_FOLDER = "./results-v3"

In [3]:
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_use_double_quant=True,
)

model = AutoModelForCausalLM.from_pretrained(
    "NousResearch/Llama-2-7b-chat-hf",
    quantization_config=bnb_config
)
model.config.use_cache = False
model.config.pretraining_tp = 1

tokenizer = AutoTokenizer.from_pretrained("NousResearch/Llama-2-7b-chat-hf", trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right" # Fix weird overflow issue with fp16 training

peft_config = LoraConfig(
    lora_alpha=16,
    lora_dropout=0.1,
    r=64,
    bias="none",
    task_type="CAUSAL_LM",
)

training_arguments = TrainingArguments(
    output_dir=OUTPUT_FOLDER,
    num_train_epochs=1,
    per_device_train_batch_size=8,
    gradient_accumulation_steps=1,
    optim="paged_adamw_32bit",
    learning_rate=2e-4,
    weight_decay=0.001,
    bf16=True,
    max_grad_norm=0.3,
    warmup_ratio=0.03,
    group_by_length=True,
    lr_scheduler_type="cosine"
)

trainer = SFTTrainer(
    model=model,
    train_dataset=dataset,
    peft_config=peft_config,
    dataset_text_field="Combined",
    max_seq_length=128,
    tokenizer=tokenizer,
    args=training_arguments,
)

# Train model
trainer.train()

`low_cpu_mem_usage` was None, now set to True since model is quantized.


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

You are calling `save_pretrained` to a 4-bit converted model, but your `bitsandbytes` version doesn't support it. If you want to save 4-bit models, make sure to have `bitsandbytes>=0.41.3` installed.

Deprecated positional argument(s) used in SFTTrainer, please use the SFTConfig to set these arguments instead.


Map:   0%|          | 0/109761 [00:00<?, ? examples/s]

You are using 8-bit optimizers with a version of `bitsandbytes` < 0.41.1. It is recommended to update your version as a major bug has been fixed in 8-bit optimizers.


Step,Training Loss
500,1.1036
1000,0.4738
1500,0.4616
2000,0.458
2500,0.4525
3000,0.4477
3500,0.447
4000,0.4437
4500,0.4458
5000,0.4416


TrainOutput(global_step=13721, training_loss=0.45786679464373153, metrics={'train_runtime': 5878.8338, 'train_samples_per_second': 18.671, 'train_steps_per_second': 2.334, 'total_flos': 2.356200455959511e+17, 'train_loss': 0.45786679464373153, 'epoch': 1.0})

In [4]:
trainer.model.save_pretrained(OUTPUT_FOLDER)
tokenizer.save_pretrained(OUTPUT_FOLDER)

('./results-v3/tokenizer_config.json',
 './results-v3/special_tokens_map.json',
 './results-v3/tokenizer.model',
 './results-v3/added_tokens.json',
 './results-v3/tokenizer.json')