<a href="https://colab.research.google.com/github/hissain/ml/blob/main/codes/fine_tuning/Fine_Tuning_with_SFTTrainer_LoRA_PEFT.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [3]:
%%capture log
!pip install datasets
!pip install trl
!pip install peft
!pip install accelerate
!pip install transformers
!pip install wandb

In [6]:
%%capture log
!pip install -U "huggingface_hub[cli]"

In [8]:
!huggingface-cli login


    _|    _|  _|    _|    _|_|_|    _|_|_|  _|_|_|  _|      _|    _|_|_|      _|_|_|_|    _|_|      _|_|_|  _|_|_|_|
    _|    _|  _|    _|  _|        _|          _|    _|_|    _|  _|            _|        _|    _|  _|        _|
    _|_|_|_|  _|    _|  _|  _|_|  _|  _|_|    _|    _|  _|  _|  _|  _|_|      _|_|_|    _|_|_|_|  _|        _|_|_|
    _|    _|  _|    _|  _|    _|  _|    _|    _|    _|    _|_|  _|    _|      _|        _|    _|  _|        _|
    _|    _|    _|_|      _|_|_|    _|_|_|  _|_|_|  _|      _|    _|_|_|      _|        _|    _|    _|_|_|  _|_|_|_|

    To log in, `huggingface_hub` requires a token generated from https://huggingface.co/settings/tokens .
Enter your token (input will not be visible): 
Add token as git credential? (Y/n) Y
Token is valid (permission: read).
The token `Read` has been saved to /root/.cache/huggingface/stored_tokens
[1m[31mCannot authenticate through git-credential as no helper is defined on your machine.
You might have to re-authenticate w

In [4]:
import os
os.environ["WANDB_MODE"] = "disabled"

In [None]:
from datasets import load_dataset
from peft import LoraConfig, get_peft_model
from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments, Trainer, DataCollatorForSeq2Seq

# Load dataset and subset
dataset = load_dataset("stanfordnlp/imdb", split="train")
subset = dataset.select(range(5000))

# Load base model and tokenizer
base_model = "facebook/opt-350m"
tokenizer = AutoTokenizer.from_pretrained(base_model)
model = AutoModelForCausalLM.from_pretrained(base_model)

# Configure LoRA
lora_config = LoraConfig(
    r=16,
    lora_alpha=32,
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM",
)
model = get_peft_model(model, lora_config)

# Tokenize dataset
def preprocess_function(examples):
    outputs = tokenizer(
        examples["text"],
        truncation=True,
        padding="max_length",
        max_length=512,
    )
    outputs["labels"] = outputs["input_ids"].copy()
    return outputs

tokenized_dataset = subset.map(preprocess_function, batched=True, remove_columns=subset.column_names)

# Define data collator for padding
data_collator = DataCollatorForSeq2Seq(tokenizer, model=model)

# Training arguments
training_args = TrainingArguments(
    output_dir="./lora_fine_tuned",
    per_device_train_batch_size=8,
    num_train_epochs=3,
    logging_dir="./logs",
    logging_steps=50,
    save_strategy="epoch",
    evaluation_strategy="no",
    learning_rate=2e-4,
    fp16=True,
    dataloader_num_workers=2,
    report_to="none",
)

# Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset,
    tokenizer=tokenizer,
    data_collator=data_collator,
)

# Train
trainer.train()

# Save the fine-tuned model
trainer.model.save_pretrained("./lora_fine_tuned")
tokenizer.save_pretrained("./lora_fine_tuned")


Map:   0%|          | 0/5000 [00:00<?, ? examples/s]

  trainer = Trainer(
  self.scaler = torch.cuda.amp.GradScaler(**kwargs)
  self.pid = os.fork()


Step,Training Loss
50,2.5674
100,1.8139
150,1.8104
200,1.802
250,1.8189
300,1.765
350,1.7004
400,1.8195
450,1.7662
500,1.8299


  self.pid = os.fork()
  self.pid = os.fork()


('./lora_fine_tuned/tokenizer_config.json',
 './lora_fine_tuned/special_tokens_map.json',
 './lora_fine_tuned/vocab.json',
 './lora_fine_tuned/merges.txt',
 './lora_fine_tuned/added_tokens.json',
 './lora_fine_tuned/tokenizer.json')

In [None]:
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch

model_path = "./lora_fine_tuned"
tokenizer = AutoTokenizer.from_pretrained(model_path)
model = AutoModelForCausalLM.from_pretrained(model_path)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

input_text = "The movie was absolutely"
input_ids = tokenizer.encode(input_text, return_tensors="pt").to(device)

output = model.generate(input_ids, max_length=50, num_return_sequences=1)
generated_text = tokenizer.decode(output[0], skip_special_tokens=True)

print("Generated Text:", generated_text)


Generated Text: The movie was absolutely terrible. The plot was terrible, the acting was terrible, the acting was terrible, the acting was terrible, the acting was terrible, the acting was terrible, the acting was terrible, the acting was terrible, the acting was


### Complete Code for Fine-Tuning a LLaMA Model

In [None]:
from datasets import load_dataset
from transformers import AutoTokenizer, AutoModelForCausalLM, TrainingArguments, Trainer
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training
from transformers import DataCollatorForSeq2Seq

# Load dataset and subset
dataset = load_dataset("stanfordnlp/imdb", split="train")
subset = dataset.select(range(5000))

# Load LLaMA small model and tokenizer
#base_model = "meta-llama/Llama-2-7b-hf"
base_model = "meta-llama/Meta-Llama-3-8B"

tokenizer = AutoTokenizer.from_pretrained(base_model)
model = AutoModelForCausalLM.from_pretrained(base_model)

# Prepare model for LoRA (optional step for quantization-aware training)
model = prepare_model_for_kbit_training(model)

# Configure LoRA
lora_config = LoraConfig(
    r=16,
    lora_alpha=32,
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM",
)
model = get_peft_model(model, lora_config)

# Tokenize dataset
def preprocess_function(examples):
    outputs = tokenizer(
        examples["text"],
        truncation=True,
        padding="max_length",
        max_length=512,
    )
    outputs["labels"] = outputs["input_ids"].copy()
    return outputs

tokenized_dataset = subset.map(preprocess_function, batched=True, remove_columns=subset.column_names)

# Define data collator for padding
data_collator = DataCollatorForSeq2Seq(tokenizer, model=model)

# Training arguments with memory optimization
training_args = TrainingArguments(
    output_dir="./llama_lora_fine_tuned",
    per_device_train_batch_size=4,  # Smaller batch size
    gradient_accumulation_steps=2,  # Accumulate gradients
    num_train_epochs=3,
    logging_dir="./logs",
    logging_steps=50,
    save_strategy="epoch",
    evaluation_strategy="no",
    learning_rate=2e-4,
    fp16=True,  # Enable mixed precision
    dataloader_num_workers=2,
    dataloader_pin_memory=True,  # Optimize data transfer
    report_to="none",
)

# Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset,
    tokenizer=tokenizer,
    data_collator=data_collator,
)

# Train
trainer.train()

# Save the fine-tuned model
trainer.model.save_pretrained("./llama_lora_fine_tuned")
tokenizer.save_pretrained("./llama_lora_fine_tuned")


tokenizer_config.json:   0%|          | 0.00/50.6k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/9.09M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/73.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/654 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/23.9k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/4 [00:00<?, ?it/s]

model-00001-of-00004.safetensors:   0%|          | 0.00/4.98G [00:00<?, ?B/s]

model-00002-of-00004.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

In [2]:
from transformers import AutoModelForCausalLM, AutoTokenizer
model_path = "./llama_lora_fine_tuned"
tokenizer = AutoTokenizer.from_pretrained(model_path)
model = AutoModelForCausalLM.from_pretrained(model_path)

input_text = "The movie was absolutely"
input_ids = tokenizer.encode(input_text, return_tensors="pt")

output = model.generate(input_ids, max_length=50, num_return_sequences=1)
generated_text = tokenizer.decode(output[0], skip_special_tokens=True)

print("Generated Text:", generated_text)


OSError: Incorrect path_or_model_id: './llama_lora_fine_tuned'. Please provide either the path to a local folder or the repo_id of a model on the Hub.