In [1]:
# requirement to run this notebook; uncomment install and restart kernel if your environment is missing any of these dependencies
# ! pip install --user --upgrade "transformers>=4.43.2" "peft>=0.7.1,!=0.11.0" "trl>=0.7.9,<0.9.0" bitsandbytes "accelerate>=0.26.1"

In [None]:
import mlflow
import torch
from datasets import load_dataset

from trl import SFTTrainer
from peft import LoraConfig, get_peft_model
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig, TrainingArguments, AutoTokenizer

In [2]:
model_name = "NousResearch/Meta-Llama-3.1-8B"

use_4_bit = False
use_8_bit = True

tokenizer = AutoTokenizer.from_pretrained(model_name)
tokenizer.pad_token = tokenizer.eos_token

if use_4_bit:
    bnb_config = BitsAndBytesConfig(
        load_in_4bit=True,
        bnb_4bit_quant_type="nf4",
        bnb_4bit_compute_dtype=torch.float16,
    )

if use_8_bit:
    bnb_config = BitsAndBytesConfig(load_in_8bit=True)

model = AutoModelForCausalLM.from_pretrained(
    model_name,
    quantization_config=bnb_config,
    trust_remote_code=True,
    torch_dtype=torch.bfloat16 # using an A10G
)
model.config.use_cache = False

dataset_name = "mlabonne/guanaco-llama2-1k"

dataset = load_dataset(dataset_name, split="train")

`low_cpu_mem_usage` was None, now set to True since model is quantized.
Loading checkpoint shards: 100%|██████████| 4/4 [00:07<00:00,  1.97s/it]


In [4]:
training_args = TrainingArguments(
    output_dir="./results",
    num_train_epochs=1, # just for demo, increase when training for real
    per_device_train_batch_size=4,
    logging_dir='./logs',
    logging_steps=10,
    gradient_checkpointing=True,
    report_to=None
)

lora_config = LoraConfig(
        r=8,
        bias="none",
        task_type="CAUSAL_LM",
    )

trainer = SFTTrainer(
    model=model,
    tokenizer=tokenizer,
    args=training_args,
    peft_config=lora_config,
    train_dataset=dataset,
    dataset_text_field="text",
)

print("Fine-tuning model:")
experiment_name = 'llama3-1-8b-8bit-lora-ft'
exp = mlflow.set_experiment(experiment_name)
with mlflow.start_run() as run:
    trainer.train()



Fine-tuning model:


2024/07/31 01:50:28 INFO mlflow.tracking.fluent: Experiment with name 'llama3-1-8b-8bit-lora-ft' does not exist. Creating a new experiment.


Step,Training Loss
10,1.4407
20,1.3766
30,1.3628
40,1.357
50,1.4362
60,1.3032
70,1.3087
80,1.3759
90,1.5657
100,1.236


In [5]:
model_to_save = trainer.model.module if hasattr(trainer.model, 'module') else trainer.model
# save to a Domino dataset for the app and in artifacts for the API
# model_save_location = "/mnt/artifacts/lora/" 
model_save_location = '/mnt/data/llama3_1_sft/' 
model_to_save.save_pretrained(model_save_location) 

In [None]:
lora_config = LoraConfig.from_pretrained(model_save_location)
model = get_peft_model(model, lora_config)

In [5]:
text = "Who is Mark Zukerberg?"
device = "cuda:0"

inputs = tokenizer(text, return_tensors="pt").to(device)
outputs = model.generate(**inputs, max_new_tokens=750)
print(tokenizer.decode(outputs[0], skip_special_tokens=True))

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Who is Mark Zukerberg? Mark Zuckerberg is an American computer programmer and Internet entrepreneur. He is the chairman, chief executive officer, and co-founder of the social networking website Facebook. Zuckerberg was born in White Plains, New York, and was raised in Dobbs Ferry, New York. He attended Phillips Exeter Academy for high school. He majored in computer science at Harvard University, where he began Facebook as a sophomore. Facebook was originally called The Facebook. Zuckerberg is known for wearing the same gray T-shirt every day. He is a vegetarian and has pledged to give away at least 99% of his Facebook stock to charity. He is the 14th richest person in the world with a net worth of $17.5 billion.
Zuckerberg is known for wearing the same gray T-shirt every day.
He is a vegetarian and has pledged to give away at least 99% of his Facebook stock to charity.
He is the 14th richest person in the world with a net worth of $17.5 billion.
