<a href="https://colab.research.google.com/github/afifaniks/triagerX/blob/main/notebook/Fine_Tune_Llama_2_with_LoRA.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install -q accelerate==0.21.0 peft==0.4.0 bitsandbytes==0.40.2 transformers==4.31.0 trl==0.4.7

[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/244.2 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━━━━━[0m[90m╺[0m[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m61.4/244.2 kB[0m [31m1.9 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m244.2/244.2 kB[0m [31m3.9 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m72.9/72.9 kB[0m [31m6.2 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m92.5/92.5 MB[0m [31m10.8 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.4/7.4 MB[0m [31m70.3 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m77.4/77.4 kB[0m [31m7.4 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.3/1.3 MB[0m [31m9.9 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━

In [None]:
import os
import torch
from datasets import load_dataset
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    HfArgumentParser,
    TrainingArguments,
    pipeline,
    logging
)
from peft import LoraConfig, PeftModel
from trl import SFTTrainer

## Configuration

In [None]:
model_name = "NousResearch/Llama-2-7b-chat-hf"
dataset_name = "mlabonne/guanaco-llama2-1k"
new_model = "llama-2-7b-guanaco-qlora"

# Set QLoRA configuration
lora_r = 64 # Attention dimension/rank
lora_alpha = 16
lora_dropout = 0.05

# Set bitsandbytes configuration
use_4bit = True #For  4-bit precision base model loading
bnb_4bit_compute_dtype = "float16" # Compute dtype for 4-bit base models
bnb_4bit_quant_type = "nf4" # Quantization type (fp4 or nf4)
use_nested_quant = False # Activate nested quantization for 4-bit base models (double quantization)


# Set training params
output_dir = "./results"
num_train_epochs = 1
fp16 = False
bf16 = False
per_device_train_batch_size = 4
per_device_eval_batch_size = 4
gradient_accumulation_steps = 1
gradient_checkpointing = True
max_grad_norm = 0.3
learning_rate = 2e-4
weight_decay = 0.001
optim = "paged_adamw_32bit"
lr_scheduler_type = "cosine"
max_steps = 10
warmup_ratio = 0.03
group_by_length = True # Group sequences into batches with same length saves memory and speeds up training considerably
save_steps = 0
logging_steps = 10

# Set SFT parameters
max_seq_length = None
packing = False # Pack multiple short examples in the same input sequence to increase efficiency
device_map = {"": 0} # Load the entire model on the GPU 0

In [None]:
compute_dtype = getattr(torch, bnb_4bit_compute_dtype)

bnb_config = BitsAndBytesConfig(
    load_in_4bit=use_4bit,
    bnb_4bit_quant_type=bnb_4bit_quant_type,
    bnb_4bit_compute_dtype=compute_dtype,
    bnb_4bit_use_double_quant=use_nested_quant,
)

## Load Base Model

In [None]:
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    quantization_config=bnb_config, # Using it for optimized model loading
    device_map=device_map
)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [None]:
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right" # Fix overflow issue with fp16 training

## Setup Training Pipeline

In [None]:
dataset = load_dataset(dataset_name, split="train")

In [None]:
if compute_dtype == torch.float16 and use_4bit:
    major, _ = torch.cuda.get_device_capability()
    if major >= 8:
        print("=" * 80)
        print("Your GPU supports bfloat16: accelerate training with bf16=True")
        print("=" * 80)
    else:
      print(f"Using {compute_dtype}")

Using torch.float16


In [None]:
model.config.use_cache = False
model.config.pretraining_tp = 1

peft_config = LoraConfig(
    r=lora_r,
    lora_alpha=lora_alpha,
    lora_dropout=lora_dropout,
    bias="none",
    task_type="CAUSAL_LM",
)

training_params = TrainingArguments(
    output_dir=output_dir,
    num_train_epochs=num_train_epochs,
    per_device_train_batch_size=per_device_train_batch_size,
    gradient_accumulation_steps=gradient_accumulation_steps,
    optim=optim,
    save_steps=save_steps,
    logging_steps=logging_steps,
    learning_rate=learning_rate,
    weight_decay=weight_decay,
    fp16=fp16,
    bf16=bf16,
    max_grad_norm=max_grad_norm,
    max_steps=max_steps,
    warmup_ratio=warmup_ratio,
    group_by_length=group_by_length,
    lr_scheduler_type=lr_scheduler_type,
)

trainer = SFTTrainer(
    model=model,
    train_dataset=dataset,
    peft_config=peft_config,
    dataset_text_field="text",
    max_seq_length=max_seq_length,
    tokenizer=tokenizer,
    args=training_params,
    packing=packing,
)

trainer.train()
trainer.model.save_pretrained(new_model)

You're using a LlamaTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


Step,Training Loss
10,1.3893


## Test Base Model

In [None]:
def inference(model, tokenizer, prompt, max_length=200):
  pipe = pipeline(task="text-generation", model=model, tokenizer=tokenizer, max_length=max_length)
  result = pipe(f"<s>[INST] {prompt} [/INST]")

  return result[0]["generated_text"]

In [None]:
print(inference(model, tokenizer, "Why birds don't have wheels? Answer as briefly as possible"))

Xformers is not installed correctly. If you want to use memory_efficient_attention to accelerate training use the following command to install Xformers
pip install xformers.


<s>[INST] Why birds don't have wheels? Answer as briefly as possible [/INST]  Birds don't have wheels because they are aerial creatures that fly, not designed for ground transportation. nobody needs wheels when you can fly! 😊


In [None]:
print(inference(model, tokenizer, "Can you write some Delphi code that uses named pipes?"))

<s>[INST] Can you write some Delphi code that uses named pipes? [/INST]  Certainly! Here is an example of how to use named pipes in Delphi:
 Unterscheidung between input and output pipes:
```
procedure TForm1.Button1Click(Sender: TObject);
begin
  // Create an input pipe
  Pipe := CreateFile('in.txt', GENERIC_READ, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0


In [None]:
print(inference(model, tokenizer, "Which is a species of fish? Tope or Rope"))

<s>[INST] Which is a species of fish? Tope or Rope [/INST]  Both "tope" and "rope" are not species of fish. nobody.

"Tope" is a term used in fishing to refer to a large, powerful fish, typically a shark or a large predatory fish.

"Rope" is not a species of fish, but rather a type of cord or line made from twisted fibers.

If you have any other questions, feel free to ask!


## Memory Cleanup to Save Fine-Tuned Model

In [None]:
del model
del trainer
import gc
gc.collect()
gc.collect()

## Merge LoRA Weights with Base Model

In [None]:
base_model = AutoModelForCausalLM.from_pretrained(
    model_name,
    low_cpu_mem_usage=True,
    return_dict=True,
    torch_dtype=torch.float16,
    device_map=device_map,
)
model = PeftModel.from_pretrained(base_model, new_model)
model = model.merge_and_unload()

tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [None]:
!huggingface-cli login

model.push_to_hub(new_model, use_temp_dir=False)
tokenizer.push_to_hub(new_model, use_temp_dir=False)

## Test Fine-Tuned Model

In [None]:
try:
    del tokenizer
    del model
    del base_model
except:
    pass

In [None]:
hf_custom_model_path = f"afifaniks/{new_model}"

model = AutoModelForCausalLM.from_pretrained(
    model_name,
    low_cpu_mem_usage=True,
    return_dict=True,
    torch_dtype=torch.float16,
    device_map=device_map,
)

In [None]:
model = PeftModel.from_pretrained(model, hf_custom_model_path)
model = model.merge_and_unload()

In [None]:
print(inference(model, tokenizer, "Why birds don't have wheels? Answer as briefly as possible"))

<s>[INST] Why birds don't have wheels? Answer as briefly as possible [/INST]  Birds don't have wheels because they are adapted for flight, which requires lightweight, streamlined bodies and powerful wings for lift and maneuverability. Additionally, wheels would hinder their ability to fly and navigate through their environment.


In [None]:
print(inference(model, tokenizer, "Can you write some Delphi code that uses named pipes?"))

<s>[INST] Can you write some Delphi code that uses named pipes? [/INST]  Certainly! Here is an example of how to use named pipes in Delphi:

First, you will need to create a named pipe. You can do this using the `CreateNamedPipe` function, which takes the following parameters:

* `hPipe`: a handle to the named pipe
* `dwOpenMode`: a value that specifies how the pipe should be opened (e.g. `PIPE_ACCESS_DUPLEX` for a duplex pipe)
* `dwWriteMode`: a value that specifies how the pipe should be written to (e.g. `PIPE_TYPE_MESSAGE` for a message-based pipe)
* `nMaxInstances`: the maximum number of instances that can be open on the pipe
* `nOutBufferSize`: the size


In [None]:
print(inference(model, tokenizer, "Which is a species of fish? Tope or Rope"))

<s>[INST] Which is a species of fish? Tope or Rope [/INST]  I apologize, but "Tope" and "Rope" are not species of fish. They are actually two different types of fishing gear.

* Tope is a type of fishing line used for deep-sea fishing, particularly for large game fish such as tuna and marlin.
* Rope is a type of fishing line used for bottom fishing, particularly for species such as cod and halibut.

So, neither "Tope" nor "Rope" is a species of fish. I hope this clears up any confusion!
