# Supervised Fine Tuning of llama

In [1]:
import torch
import os
from datetime import datetime

import transformers
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
from trl import SFTConfig, SFTTrainer

from peft import LoraConfig

from datasets import load_dataset

print(f'Is bf16 supported: {torch.cuda.is_bf16_supported()}')

device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(f'Using device: {device}')

Is bf16 supported: True
Using device: cuda


# Model ID
* **NOTE:** You need to have access to these models
* Visit huggingface page and request access
* Set your token via setting the environment variable HF_TOKEN

In [2]:
model_id = 'meta-llama/Llama-3.2-1B-Instruct'
#model_id = 'meta-llama/Meta-Llama-3-8B-Instruct'
#model_id = 'meta-llama/Meta-Llama-3-8B'

# Quantization Settings

In [3]:
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True, # 4 bit quantization
    bnb_4bit_quant_type='nf4', # normalized float 4 bit
    bnb_4bit_compute_dtype=torch.bfloat16 if torch.cuda.is_bf16_supported() else torch.float16
)

# Tokenizer

In [4]:
tokenizer = AutoTokenizer.from_pretrained(
    model_id,
    token=os.environ['HF_TOKEN'] # required for some models
)

### Special Tokens

In [5]:
tokenizer.special_tokens_map

{'bos_token': '<|begin_of_text|>', 'eos_token': '<|eot_id|>'}

### Terminators List

In [6]:
terminators = [
    tokenizer.eos_token_id,
    tokenizer.convert_tokens_to_ids('<|eot_id|>')
]

# Model 

In [7]:
model = AutoModelForCausalLM.from_pretrained(
    model_id, 
    quantization_config=bnb_config, 
    device_map={'':0}, 
    token=os.environ['HF_TOKEN'] # required for some models
)

### Open Ended Generation

In [8]:
tokenizer.pad_token = tokenizer.eos_token
model.generation_config.pad_token_id = tokenizer.pad_token_id

# LoRA

In [9]:
lora_config = LoraConfig(
    r=8,
    lora_alpha=8,
    lora_dropout=0.1,
    target_modules=['q_proj', 'o_proj', 'k_proj', 'v_proj', 'gate_proj', 'up_proj', 'down_proj'],
    task_type='CAUSAL_LM',
)

# Fine Tuning Dataset

In [10]:
data = load_dataset('flytech/python-codes-25k')
data

DatasetDict({
    train: Dataset({
        features: ['output', 'instruction', 'input', 'text'],
        num_rows: 49626
    })
})

In [11]:
data['train']['output'][0]

"```python\ntasks = []\nwhile True:\n    task = input('Enter a task or type 'done' to finish: ')\n    if task == 'done': break\n    tasks.append(task)\nprint(f'Your to-do list for today: {tasks}')\n```"

In [12]:
data['train']['instruction'][0]

'Help me set up my daily to-do list!'

In [13]:
data['train']['input'][0]

'Setting up your daily to-do list...'

In [14]:
data['train']['text'][0]

"Help me set up my daily to-do list! Setting up your daily to-do list... ```python\ntasks = []\nwhile True:\n    task = input('Enter a task or type 'done' to finish: ')\n    if task == 'done': break\n    tasks.append(task)\nprint(f'Your to-do list for today: {tasks}')\n```"

## Utility Function for Preprocessing

* llama 3 special tokens: https://llama.meta.com/docs/model-cards-and-prompt-formats/meta-llama-3/
* llama 3.1 special tokens: https://llama.meta.com/docs/model-cards-and-prompt-formats/llama3_1

In [15]:
def formatting_func(example):
    
    instruction = example['instruction']
    input_assistant = example['input']
    output = example['output']

    # THANK YOU FOR CHOOSING US is added to test the fine tuning
    
    text = f"""<|begin_of_text|><|start_header_id|>system<|end_header_id|>
    You are a helpful assistant.
    <|eot_id|>
    
    <|start_header_id|>user<|end_header_id|>
    {instruction} <|eot_id|>
    
    <|start_header_id|>assistant<|end_header_id|>
    THANK YOU FOR CHOOSING US
    THANK YOU FOR CHOOSING US
    
    {input_assistant}
    {output} <|eot_id|>"""
    
    #return [text]
    return {'sample': text}

In [16]:
data = data.map(
    formatting_func,
    num_proc=4,
)

data

DatasetDict({
    train: Dataset({
        features: ['output', 'instruction', 'input', 'text', 'sample'],
        num_rows: 49626
    })
})

In [17]:
data['train']['sample'][0]

"<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n    You are a helpful assistant.\n    <|eot_id|>\n    \n    <|start_header_id|>user<|end_header_id|>\n    Help me set up my daily to-do list! <|eot_id|>\n    \n    <|start_header_id|>assistant<|end_header_id|>\n    THANK YOU FOR CHOOSING US\n    THANK YOU FOR CHOOSING US\n    \n    Setting up your daily to-do list...\n    ```python\ntasks = []\nwhile True:\n    task = input('Enter a task or type 'done' to finish: ')\n    if task == 'done': break\n    tasks.append(task)\nprint(f'Your to-do list for today: {tasks}')\n``` <|eot_id|>"

### Calculate Statistics of Token Length

In [18]:
import pandas as pd

len_list = []

for sample in data['train']['sample']:
    tokens = tokenizer.encode(sample)
    len_list.append(len(tokens))

pd.DataFrame(len_list).describe()

Unnamed: 0,0
count,49626.0
mean,165.883448
std,74.98331
min,60.0
25%,114.0
50%,144.0
75%,193.0
max,506.0


# Training

In [19]:
sft_arguments = SFTConfig(
    output_dir='training_output',
    overwrite_output_dir=False,
    #num_train_epochs=1, # NOTE: max_steps will override this
    per_device_train_batch_size=1,
    gradient_accumulation_steps=4,
    warmup_steps=2,
    warmup_ratio=0.03,
    max_steps=50, # -1 for epoch based training
    save_steps=25,
    logging_steps=25,
    learning_rate=2e-4,
    weight_decay=1e-4, 
    max_grad_norm=0.75, # gradient clipping
    lr_scheduler_type='constant',
    #max_seq_length=200, # Reduce VRAM (Meta-Llama-3-8B-*)
    max_seq_length=1024 * 4,
    group_by_length=True,
    fp16=not torch.cuda.is_bf16_supported(),
    bf16=torch.cuda.is_bf16_supported(),        
    optim='paged_adamw_8bit',
    report_to='tensorboard',
    dataset_text_field='sample',
)

trainer = SFTTrainer(
    model=model,
    tokenizer=tokenizer,
    train_dataset=data['train'],
    args=sft_arguments,
    peft_config=lora_config,
    #formatting_func=formatting_func,
)

max_steps is given, it will override any value given in num_train_epochs


## Start Training

In [20]:
trainer.train()

Step,Training Loss
25,1.3056
50,1.0109


TrainOutput(global_step=50, training_loss=1.1582257080078124, metrics={'train_runtime': 27.0853, 'train_samples_per_second': 7.384, 'train_steps_per_second': 1.846, 'total_flos': 194098384539648.0, 'train_loss': 1.1582257080078124, 'epoch': 0.004030145488252126})

# Save

In [21]:
os.makedirs('saved_models', exist_ok=True)
#trainer.save_model('saved_models/llama_3_1_python')
trainer.save_model('saved_models/llama_3_2_1B_python') # Llama-3.2-1B-Instruct

# Generate Text with Fine Tuned Model
* https://huggingface.co/docs/transformers/main_classes/text_generation

In [22]:
@torch.amp.autocast('cuda')
def generate(prompt, cut=True):

    text = f"""<|begin_of_text|><|start_header_id|>system<|end_header_id|>    
    You are a helpful assistant.
    <|eot_id|>
    
    <|start_header_id|>user<|end_header_id|>
    {prompt} <|eot_id|>
    
    <|start_header_id|>assistant<|end_header_id|>"""
    
    inputs = tokenizer(text, return_tensors='pt').to(device)
    input_token_len = len(inputs['input_ids'][0])
    
    #print(inputs)
    #print('Input token len:', input_token_len)
    
    outputs = model.generate(
        **inputs, # input token ids and attention mask (as kwargs)
        max_new_tokens=64,
        #eos_token_id=terminators,
        do_sample=True,
        temperature=0.75,
        #top_p=0.9,
        num_beams=2,
        num_return_sequences=1,
        return_dict_in_generate=True,
        output_scores=False,
    )

    #print(f'LEN: {len(outputs)}')
    #print('KEYS', outputs.keys())
    #print(f'Scores: {outputs["sequences_scores"]}')
    
    for _gen_output in outputs['sequences']:
        #_generated_response = tokenizer.decode(_gen_output, skip_special_tokens=False)
        _generated_response = tokenizer.decode(_gen_output[input_token_len:], skip_special_tokens=False)
    
        #if cut:
        #    _generated_response = _generated_response.split('<|eot_id|>')[0]
            
        print(_generated_response)
        print('*'*65)
        print('#'*65)
        print()

In [23]:
prompt = 'how to convert json to dataframe.'
generate(prompt)

    THANK YOU FOR CHOOSING US
    THANK YOU FOR CHOOSING US
    
    
    ```python
import json
import pandas as pd

json_data = json.loads('{"key": "value"}')
df = pd.json_normalize(json_data)
print(df)
```  ```python
``` <|start_header_id|>assistant<|end_header_id|>
*****************************************************************
#################################################################



In [24]:
prompt = 'Write a for loop for dummy list'
generate(prompt)

    THANK YOU FOR CHOOSING US
    THANK YOU FOR CHOOSING US
    
    
    ```python
for i in range(len(dummy_list)):
    print(dummy_list[i])
```  ```python
``` <|start_header_id|>assistant<|end_header_id|>    THANK YOU FOR CHOOSING US
    THANK YOU FOR CHOOSING US
    
    

*****************************************************************
#################################################################



In [25]:
prompt = 'Türkçe bir hikaye yaz'
generate(prompt, cut=False)

    THANK YOU FOR CHOOSING US
    THANK YOU FOR CHOOSING US
    
    
    Writing a story in Turkish...
    ```python
story = "Hikaye"
print(story)
```  ```python
story = "Hikaye"
print(story)
```  ```python
story = "
*****************************************************************
#################################################################

