# Checking the imports

In [1]:
import torch

print("Is a ROCm-GPU detected? ", torch.cuda.is_available())
print("How many ROCm-GPUs are detected? ", torch.cuda.device_count())

Is a ROCm-GPU detected?  True
How many ROCm-GPUs are detected?  1




In [2]:
import torch
from datasets import load_dataset
from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments
from peft import LoraConfig
from trl import SFTTrainer



# Login to HuggingFace

In [3]:
# Base model and tokenizer names.
base_model_name = "tiiuae/falcon-7b"

# Load base model to GPU memory.
device = "cuda:0"
base_model = AutoModelForCausalLM.from_pretrained(
    base_model_name, trust_remote_code=True,
     torch_dtype=torch.float16
).to(device)

# Load tokenizer.
tokenizer = AutoTokenizer.from_pretrained(base_model_name, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"





Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

# Dataset for finetuning

In [4]:
# Dataset for fine-tuning.
training_dataset_name = "mlabonne/guanaco-llama2-1k"
training_dataset = load_dataset(training_dataset_name, split = "train")

# Check the data.
print(training_dataset)

# Dataset 11 is a QA sample in English.
print(training_dataset[11])

Dataset({
    features: ['text'],
    num_rows: 1000
})
{'text': '<s>[INST] write me a 1000 words essay about deez nuts. [/INST] The Deez Nuts meme first gained popularity in 2015 on the social media platform Vine. The video featured a young man named Rodney Bullard, who recorded himself asking people if they had heard of a particular rapper. When they responded that they had not, he would respond with the phrase "Deez Nuts" and film their reactions. The video quickly went viral, and the phrase became a popular meme. \n\nSince then, Deez Nuts has been used in a variety of contexts to interrupt conversations, derail discussions, or simply add humor to a situation. It has been used in internet memes, in popular music, and even in politics. In the 2016 US presidential election, a 15-year-old boy named Brady Olson registered as an independent candidate under the name Deez Nuts. He gained some traction in the polls and even made appearances on national news programs.\n\nThe Deez Nuts meme h

# Fine-tuning params

In [14]:
# Training parameters for SFTTrainer.
training_arguments = TrainingArguments(
    output_dir = "./results",
         num_train_epochs = 1,
         per_device_train_batch_size = 4,
         gradient_accumulation_steps = 1,
         optim = "paged_adamw_32bit",
         save_steps = 50,
         logging_steps = 50,
         learning_rate = 4e-5,
         weight_decay = 0.001,
         fp16=False,
         bf16=False,
         max_grad_norm = 0.3,
         max_steps = -1,
         warmup_ratio = 0.03,
         group_by_length = True,
         lr_scheduler_type = "constant",
         report_to = "tensorboard",         
)

# Fine-tuning

In [15]:
peft_config = LoraConfig(
        lora_alpha = 16,
        lora_dropout = 0.1,
        r = 64,
        bias = "none",
        task_type = "CAUSAL_LM"
)
# View the number of trainable parameters.
from peft import get_peft_model
peft_model = get_peft_model(base_model, peft_config)
peft_model.print_trainable_parameters()

trainable params: 18,874,368 || all params: 6,940,595,072 || trainable%: 0.2719




In [16]:
# Initialize an SFT trainer.
sft_trainer = SFTTrainer(
        model = base_model,
        train_dataset = training_dataset,
        peft_config = peft_config,
        # dataset_text_field = "text",
        # tokenizer = tokenizer,
        args = training_arguments
)

# Run the trainer.
sft_trainer.train()



Converting train dataset to ChatML:   0%|          | 0/1000 [00:00<?, ? examples/s]

Adding EOS to train dataset:   0%|          | 0/1000 [00:00<?, ? examples/s]

Tokenizing train dataset:   0%|          | 0/1000 [00:00<?, ? examples/s]

Token indices sequence length is longer than the specified maximum sequence length for this model (7356 > 2048). Running this sequence through the model will result in indexing errors


Truncating train dataset:   0%|          | 0/1000 [00:00<?, ? examples/s]

No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.
HSA exception: MemoryRegion::BlockAllocator::alloc failed.
HSA exception: MemoryRegion::BlockAllocator::alloc failed.


OutOfMemoryError: HIP out of memory. Tried to allocate 1.11 GiB. GPU 0 has a total capacity of 23.98 GiB of which 1017.20 MiB is free. Of the allocated memory 20.78 GiB is allocated by PyTorch, and 1.11 GiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_HIP_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)

In [None]:
# PEFT adapter name.
adapter_name = "llama-2-7b-enhanced-adapter"

# Save PEFT adapter.
sft_trainer.model.save_pretrained(adapter_name)