### QLORA Supervised Finetuning process

#### - references:
- 

In [2]:
import os,sys
sys.path.insert(0,'../')
sys.path.insert(0,'../../../libs')
import torch
from datasets import load_dataset
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    HfArgumentParser,
    TrainingArguments,
    pipeline,
    logging,
)
from peft import LoraConfig, PeftModel
from trl import SFTTrainer

## project specific modules
import config
import utils

### 1. Load and explore datasets
- in this example we will just use Dolly from Databricks. 
- for domain specific fintuning, you should use your own data or some kind of mixing

In [3]:
dataset = load_dataset("databricks/databricks-dolly-15k", split="train",cache_dir=config.cache_dir)
print(dataset[0])
print(f"dataset size: {len(dataset)}")

{'instruction': 'When did Virgin Australia start operating?', 'context': "Virgin Australia, the trading name of Virgin Australia Airlines Pty Ltd, is an Australian-based airline. It is the largest airline by fleet size to use the Virgin brand. It commenced services on 31 August 2000 as Virgin Blue, with two aircraft on a single route. It suddenly found itself as a major airline in Australia's domestic market after the collapse of Ansett Australia in September 2001. The airline has since grown to directly serve 32 cities in Australia, from hubs in Brisbane, Melbourne and Sydney.", 'response': 'Virgin Australia commenced services on 31 August 2000 as Virgin Blue, with two aircraft on a single route.', 'category': 'closed_qa'}
dataset size: 15011


- Since we are using llama2 chat model, we should follow llama2 prompt template for our training data
- if we are using other pretrianing model, we should use the corespond training prompt template 

In [4]:
def create_prompt_formats(sample):
    """
    Format various fields of the sample ('instruction', 'context', 'response')
    transform them into llama chat style template 
    """
    system_message = 'Below is an instruction that describes a task. Write a response that appropriately completes the request.'
    prompt = '{} ###Input:{}'.format(sample['instruction'],sample['context'])
    response  = sample['response']
    sample['text'] = f'[INST] <<SYS>>\n{system_message.strip()}\n<</SYS>>\n\n' + prompt + ' [/INST] ' + response
    return sample

print(create_prompt_formats(dataset[0])['text'])

[INST] <<SYS>>
Below is an instruction that describes a task. Write a response that appropriately completes the request.
<</SYS>>

When did Virgin Australia start operating? ###Input:Virgin Australia, the trading name of Virgin Australia Airlines Pty Ltd, is an Australian-based airline. It is the largest airline by fleet size to use the Virgin brand. It commenced services on 31 August 2000 as Virgin Blue, with two aircraft on a single route. It suddenly found itself as a major airline in Australia's domestic market after the collapse of Ansett Australia in September 2001. The airline has since grown to directly serve 32 cities in Australia, from hubs in Brisbane, Melbourne and Sydney. [/INST] Virgin Australia commenced services on 31 August 2000 as Virgin Blue, with two aircraft on a single route.


In [5]:
## format and split data 
split_dataset = dataset.train_test_split(test_size=0.1, shuffle=True, seed=123)
split_dataset = split_dataset.map( create_prompt_formats) # ,remove_columns=['instruction','context','response']); use the sft trainer, remove is not necessary
train_dataset, test_dataset = split_dataset['train'],split_dataset['test']
print(train_dataset[0])

{'instruction': 'Classify each of these car manufacturers as either French, German, American, Japanese, or other: Lexus, Hyundai, Alfa Romeo, VW, Honda, GM, Citroen', 'context': '', 'response': 'French: Citroen\nGerman: VW\nAmerican: GM\nJapanese: Lexus, Honda\nother: Hyundai, Alfa Romeo', 'category': 'classification', 'text': '[INST] <<SYS>>\nBelow is an instruction that describes a task. Write a response that appropriately completes the request.\n<</SYS>>\n\nClassify each of these car manufacturers as either French, German, American, Japanese, or other: Lexus, Hyundai, Alfa Romeo, VW, Honda, GM, Citroen ###Input: [/INST] French: Citroen\nGerman: VW\nAmerican: GM\nJapanese: Lexus, Honda\nother: Hyundai, Alfa Romeo'}


### 2. Load model and set bnb config 

In [6]:
def create_bnb_config():
    # bitsandbytes parameters
    bnb_config = BitsAndBytesConfig(
        load_in_4bit= True, # Activate 4-bit precision base model loading
        bnb_4bit_quant_type="nf4", # Quantization type (fp4 or nf4)
        bnb_4bit_compute_dtype= "float16", # Compute dtype for 4-bit base models
        bnb_4bit_use_double_quant= False, # Activate nested quantization for 4-bit base models (double quantization)
    )
    compute_dtype = getattr(torch, 'float16') # Load tokenizer and model with QLoRA configuration

    # Check GPU compatibility with bfloat16
    if compute_dtype == torch.float16 and bnb_config.load_in_4bit:
        major, _ = torch.cuda.get_device_capability()
        if major >= 8:
            print("=" * 80)
            print("Your GPU supports bfloat16: accelerate training with bf16=True")
            print("=" * 80)
    
    return bnb_config

def load_model(model_name, bnb_config):
    n_gpus = torch.cuda.device_count()
    #max_memory = f'{40960}MB'

    model = AutoModelForCausalLM.from_pretrained(
        model_name,
        quantization_config=bnb_config,
        device_map="auto", # dispatch efficiently the model on the available ressources
        cache_dir= config.cache_dir
        #max_memory = {i: max_memory for i in range(n_gpus)},
    )
    model.config.use_cache = False    ## not exactly sure why need to set this one
    model.config.pretraining_tp = 1   ## not exactly sure why need to set this one
    
    tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True,cache_dir= config.cache_dir)
    
    # Needed for LLaMA tokenizer
    tokenizer.add_special_tokens({'pad_token': '[PAD]'})
    tokenizer.pad_token = tokenizer.eos_token
    tokenizer.padding_side = "right"

    return model, tokenizer

### 3. Load LoRA configuration and set training arguments

In [7]:

peft_config = LoraConfig(
        r=16,  # dimension of the updated matrices
        lora_alpha=64,  # parameter for scaling
        #target_modules=modules,   ## targeted lora modules; if want to specify see https://blog.ovhcloud.com/fine-tuning-llama-2-models-using-a-single-gpu-qlora-and-ai-notebooks/
        lora_dropout=0.1,  # dropout probability for layers
        bias="none",
        task_type="CAUSAL_LM",
    )

training_arguments = TrainingArguments(
    output_dir=os.path.join(config.data_folder3,"results","llama-2-7b-dolly-custom"),
    num_train_epochs=3,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=4,
    gradient_accumulation_steps=1,
    optim="paged_adamw_32bit",
    save_steps=500,
    logging_steps=50,
    learning_rate=1e-4,
    weight_decay=0.001,
    fp16=False,
    bf16=False,
    max_grad_norm=0.3,
    max_steps=-1, # Number of training steps (overrides num_train_epochs)
    warmup_ratio=0.03,
    group_by_length=True, # Group sequences into batches with same length, Saves memory and speeds up training considerably
    lr_scheduler_type="constant",
    #report_to="tensorboard"
)

In [8]:
# The model that you want to train from the Hugging Face hub
model_name = "NousResearch/Llama-2-7b-chat-hf"
bnb_config = create_bnb_config()
model,tokenizer = load_model(model_name, bnb_config)

Your GPU supports bfloat16: accelerate training with bf16=True


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [9]:
# Set supervised fine-tuning parameters
trainer = SFTTrainer(
    model=model,
    train_dataset=train_dataset,
    eval_dataset=test_dataset,
    peft_config=peft_config,
    dataset_text_field="text",
    max_seq_length=None,  # If none is passed, the trainer will retrieve that value from the tokenizer. 
                          # Some tokenizers do not provide default value, so there is a check to retrieve the minimum between 2048 and that value. 
    tokenizer=tokenizer,
    args=training_arguments,
    packing=False,        # already used groupping 
)



In [None]:
trainer.train()
trainer.model.save_pretrained(os.path.join(config.data_folder3,'results_final','llama-2-7b-dolly-custom'))

Step,Training Loss
100,1.224
200,1.1233
300,1.1054
400,1.1242
500,1.1015
600,1.0911
700,1.1041
800,1.0981
