# Instruction Finetuning

In this script, we investigate the usage of finetuning on the *UNSW-NB15* dataset using various preprocessing techniques.

# Key-Value pairs Text Encoding

In [1]:
from datasets import load_dataset, Dataset
Dataset.cleanup_cache_files
from dotenv import load_dotenv
from os import getenv
import os

load_dotenv()
HUGGING_FACE_READ_TOKEN = getenv("HUGGING_FACE_READ_TOKEN")

dataset = load_dataset("Jetlime/NF-UNSW-NB15-v2", streaming=False, split="train")

  from .autonotebook import tqdm as notebook_tqdm
Using the latest cached version of the dataset since Jetlime/NF-UNSW-NB15-v2 couldn't be found on the Hugging Face Hub
Found the latest cached dataset configuration 'default' at /home/paul/.cache/huggingface/datasets/Jetlime___nf-unsw-nb15-v2/default/0.0.0/e787691e196b078564cfc32297f511298a45a15f (last modified on Wed May 22 12:32:54 2024).


In [2]:
import gc

import torch
import mlflow
from datasets import load_dataset
from peft import LoraConfig, PeftModel, prepare_model_for_kbit_training
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig
)
from trl import ORPOConfig, ORPOTrainer, setup_chat_format

mlflow.set_tracking_uri(uri="http://127.0.0.1:8080")
mlflow.set_experiment(experiment_name="Testing finetuning")

<Experiment: artifact_location='mlflow-artifacts:/967304590420382862', creation_time=1715755222120, experiment_id='967304590420382862', last_update_time=1715755222120, lifecycle_stage='active', name='Testing finetuning', tags={}>

In [3]:
if torch.cuda.get_device_capability()[0] >= 8:
    attn_implementation = "flash_attention_2"
    torch_dtype = torch.bfloat16
else:
    attn_implementation = "eager"
    torch_dtype = torch.float16

In [4]:
# Model
base_model = "meta-llama/Meta-Llama-3-8B-Instruct"
new_model = "OrpoLlama-3-8B"

# QLoRA config
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch_dtype,
    bnb_4bit_use_double_quant=True,
)

# LoRA config
peft_config = LoraConfig(
    r=16,
    lora_alpha=32,
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM",
    target_modules=['up_proj', 'down_proj', 'gate_proj', 'k_proj', 'q_proj', 'v_proj', 'o_proj']
)

# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(base_model, token=HUGGING_FACE_READ_TOKEN)

# Load model
model = AutoModelForCausalLM.from_pretrained(
    base_model,
    quantization_config=bnb_config,
    attn_implementation=attn_implementation,
    token=HUGGING_FACE_READ_TOKEN
)
model, tokenizer = setup_chat_format(model, tokenizer)
model = prepare_model_for_kbit_training(model)

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
`low_cpu_mem_usage` was None, now set to True since model is quantized.
Loading checkpoint shards: 100%|██████████| 4/4 [00:04<00:00,  1.15s/it]


In [5]:
# Use only a small subset of the training set for a first finetuning trial
dataset = dataset.train_test_split(test_size=0.9998, seed=123, stratify_by_column="Attack")
dataset_finetuning_training = dataset["train"]
dataset_finetuning_training

Dataset({
    features: ['input', 'output', 'Attack'],
    num_rows: 454
})

In [6]:
def format_chat_template(row):
    row['prompt'] = row["input"]
    row["chosen"] = str(row["output"])
    if row["output"] == '1':
        row["rejected"] = '0'
    else:
        row["rejected"] = '1'
    return row

dataset_finetuning_training = dataset_finetuning_training.map(
    format_chat_template, num_proc=os.cpu_count()
)

dataset_finetuning_training

Dataset({
    features: ['input', 'output', 'Attack', 'prompt', 'chosen', 'rejected'],
    num_rows: 454
})

In [8]:
orpo_args = ORPOConfig(
    learning_rate=8e-6,
    beta=0.1,
    lr_scheduler_type="linear",
    max_length=1024,
    max_prompt_length=512,
    per_device_train_batch_size=2,
    gradient_accumulation_steps=4,
    optim="paged_adamw_8bit",
    num_train_epochs=1,
    logging_steps=1,
    do_eval=False,
    report_to="mlflow",
    output_dir="results")

trainer = ORPOTrainer(
    model=model,
    args=orpo_args,
    train_dataset=dataset_finetuning_training,
    peft_config=peft_config,
    tokenizer=tokenizer,
)
trainer.train()
trainer.save_model(new_model)

The input hidden states seems to be silently casted in float32, this might be related to the fact you have upcasted embedding or layer norm layers in float32. We will cast back the input in torch.float16.
Could not estimate the number of tokens of the input, floating-point operations will not be computed


Step,Training Loss
1,1.9307
2,1.9653
3,1.8985
4,1.8052
5,1.7487
6,1.8426
7,1.7
8,1.7197
9,1.672
10,1.7362




In [11]:
# Flush memory
# del trainer, model
gc.collect()
torch.cuda.empty_cache()

# Reload tokenizer and model
tokenizer = AutoTokenizer.from_pretrained(base_model)
model = AutoModelForCausalLM.from_pretrained(
    base_model,
    low_cpu_mem_usage=True,
    return_dict=True,
    torch_dtype=torch.float16,
    device_map="auto",
)
model, tokenizer = setup_chat_format(model, tokenizer)

# Merge adapter with base model
model = PeftModel.from_pretrained(model, new_model)
model = model.merge_and_unload()

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
Loading checkpoint shards: 100%|██████████| 4/4 [00:03<00:00,  1.16it/s]


ConnectionError: (ProtocolError('Connection aborted.', ConnectionResetError(104, 'Connection reset by peer')), '(Request ID: 549b3783-1cab-47bb-9031-1b2bab3d8b9e)')

In [13]:
HUGGING_FACE_WRITE_TOKEN = getenv("HUGGING_FACE_WRITE_TOKEN")

model.push_to_hub(new_model, use_temp_dir=False, token=HUGGING_FACE_WRITE_TOKEN)
tokenizer.push_to_hub(new_model, use_temp_dir=False, token=HUGGING_FACE_WRITE_TOKEN)

model-00002-of-00004.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]
[A

[A[A


[A[A[A

model-00002-of-00004.safetensors:   0%|          | 16.4k/5.00G [00:00<18:24:08, 75.5kB/s]
[A
model-00002-of-00004.safetensors:   0%|          | 5.11M/5.00G [00:00<07:55, 10.5MB/s]   
model-00002-of-00004.safetensors:   0%|          | 7.19M/5.00G [00:01<16:20, 5.09MB/s]
model-00002-of-00004.safetensors:   0%|          | 9.32M/5.00G [00:01<13:54, 5.98MB/s]
model-00002-of-00004.safetensors:   0%|          | 12.1M/5.00G [00:01<08:58, 9.26MB/s]
model-00002-of-00004.safetensors:   0%|          | 15.5M/5.00G [00:01<06:05, 13.6MB/s]

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A
model-00002-of-00004.safetensors:   0%|          | 17.3M/5.00G [00:02<12:35, 6.59MB/s]
model-00002-of-00004.safetensors:   1%|          | 35.5M/5.00G [00:03<05:06, 16.2MB/s]

[A[A

model-00002-of-00004.safetensors:   1%|          | 38.8M/5.00G [00:03<05:17, 15.6MB/s]

[A[A
model-00002-of-00004.safetensors:   1%|

CommitInfo(commit_url='https://huggingface.co/Jetlime/OrpoLlama-3-8B/commit/530926bb0d43ef820f8902a1d865ee649490a317', commit_message='Upload tokenizer', commit_description='', oid='530926bb0d43ef820f8902a1d865ee649490a317', pr_url=None, pr_revision=None, pr_num=None)