Original Source: https://huggingface.co/blog/llama2

In [None]:
#!pip install --upgrade peft transformers bitsandbytes accelerate torch
!export PYTORCH_ENABLE_MPS_FALLBACK=1

In [None]:
import torch
from accelerate import Accelerator
from datasets import load_dataset
from peft import LoraConfig
from transformers import TrainingArguments, LlamaTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
from trl import SFTTrainer

In [None]:
dataset_name = "timdettmers/openassistant-guanaco"
dataset_text_field = "text"
seq_length = 512
# model_path = "/Users/bsantanna/dev/workspace/community/Llama-2-7b-chat-hf"
model_path = "meta-llama/Llama-2-7b-chat-hf"

In [None]:
quantization_config = BitsAndBytesConfig(
    load_in_4bit=True,
    load_in_8bit=False
)

training_args = TrainingArguments(
    output_dir="./output",
    per_device_train_batch_size=4,
    gradient_accumulation_steps=2,
    learning_rate=1.41e-5,
    logging_steps=1,
    num_train_epochs=3,
    max_steps=-1,
    report_to=["wandb"],
    save_steps=100,
    save_total_limit=10,
    push_to_hub=False,
    hub_model_id=None,
)

peft_config = LoraConfig(
    r=64,
    lora_alpha=16,
    bias="none",
    task_type="CAUSAL_LM",
    target_modules=["q_proj", "v_proj"],
    lora_dropout=0.05,
    inference_mode=False
)

In [None]:
tokenizer = LlamaTokenizer.from_pretrained(model_path)
tokenizer.pad_token = tokenizer.eos_token
model = AutoModelForCausalLM.from_pretrained(
    model_path,
    quantization_config=quantization_config,
    device_map={"": Accelerator().local_process_index},
    torch_dtype=torch.bfloat16
)
dataset = load_dataset(dataset_name, split="train")

In [None]:
trainer = SFTTrainer(
    model=model,
    tokenizer=tokenizer,
    args=training_args,
    max_seq_length=seq_length,
    train_dataset=dataset,
    dataset_text_field=dataset_text_field,
    peft_config=peft_config,
)

In [None]:
trainer.train()