<a href="https://colab.research.google.com/github/hissain/ml/blob/main/codes/fine_tuning/Fine_Tuning_with_SFTTrainer_LoRA_PEFT.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
%%capture log
!pip install datasets
!pip install trl
!pip install peft
!pip install accelerate
!pip install transformers
!pip install wandb

In [2]:
%%capture log
!pip install -U "huggingface_hub[cli]"

In [3]:
!huggingface-cli login


    _|    _|  _|    _|    _|_|_|    _|_|_|  _|_|_|  _|      _|    _|_|_|      _|_|_|_|    _|_|      _|_|_|  _|_|_|_|
    _|    _|  _|    _|  _|        _|          _|    _|_|    _|  _|            _|        _|    _|  _|        _|
    _|_|_|_|  _|    _|  _|  _|_|  _|  _|_|    _|    _|  _|  _|  _|  _|_|      _|_|_|    _|_|_|_|  _|        _|_|_|
    _|    _|  _|    _|  _|    _|  _|    _|    _|    _|    _|_|  _|    _|      _|        _|    _|  _|        _|
    _|    _|    _|_|      _|_|_|    _|_|_|  _|_|_|  _|      _|    _|_|_|      _|        _|    _|    _|_|_|  _|_|_|_|

    A token is already saved on your machine. Run `huggingface-cli whoami` to get more information or `huggingface-cli logout` if you want to log out.
    Setting a new token will erase the existing one.
    To log in, `huggingface_hub` requires a token generated from https://huggingface.co/settings/tokens .
Enter your token (input will not be visible): 
Add token as git credential? (Y/n) Y
Token is valid (permission: read)

In [4]:
#import os
#os.environ["WANDB_MODE"] = "disabled"

In [None]:
from datasets import load_dataset
from peft import LoraConfig, get_peft_model
from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments, Trainer, DataCollatorForSeq2Seq

# Load dataset and subset
dataset = load_dataset("stanfordnlp/imdb", split="train")
subset = dataset.select(range(5000))

# Load base model and tokenizer
base_model = "facebook/opt-350m"
tokenizer = AutoTokenizer.from_pretrained(base_model)
model = AutoModelForCausalLM.from_pretrained(base_model)

# Configure LoRA
lora_config = LoraConfig(
    r=16,
    lora_alpha=32,
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM",
)
model = get_peft_model(model, lora_config)

# Tokenize dataset
def preprocess_function(examples):
    outputs = tokenizer(
        examples["text"],
        truncation=True,
        padding="max_length",
        max_length=512,
    )
    outputs["labels"] = outputs["input_ids"].copy()
    return outputs

tokenized_dataset = subset.map(preprocess_function, batched=True, remove_columns=subset.column_names)

# Define data collator for padding
data_collator = DataCollatorForSeq2Seq(tokenizer, model=model)

# Training arguments
training_args = TrainingArguments(
    output_dir="./lora_fine_tuned",
    per_device_train_batch_size=8,
    num_train_epochs=3,
    logging_dir="./logs",
    logging_steps=50,
    save_strategy="epoch",
    evaluation_strategy="no",
    learning_rate=2e-4,
    fp16=True,
    dataloader_num_workers=2,
    report_to="none",
)

# Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset,
    tokenizer=tokenizer,
    data_collator=data_collator,
)

# Train
trainer.train()

# Save the fine-tuned model
trainer.model.save_pretrained("./lora_fine_tuned")
tokenizer.save_pretrained("./lora_fine_tuned")


Map:   0%|          | 0/5000 [00:00<?, ? examples/s]

  trainer = Trainer(
  self.scaler = torch.cuda.amp.GradScaler(**kwargs)
  self.pid = os.fork()


Step,Training Loss
50,2.5674
100,1.8139
150,1.8104
200,1.802
250,1.8189
300,1.765
350,1.7004
400,1.8195
450,1.7662
500,1.8299


  self.pid = os.fork()
  self.pid = os.fork()


('./lora_fine_tuned/tokenizer_config.json',
 './lora_fine_tuned/special_tokens_map.json',
 './lora_fine_tuned/vocab.json',
 './lora_fine_tuned/merges.txt',
 './lora_fine_tuned/added_tokens.json',
 './lora_fine_tuned/tokenizer.json')

In [None]:
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch

model_path = "./lora_fine_tuned"
tokenizer = AutoTokenizer.from_pretrained(model_path)
model = AutoModelForCausalLM.from_pretrained(model_path)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

input_text = "The movie was absolutely"
input_ids = tokenizer.encode(input_text, return_tensors="pt").to(device)

output = model.generate(input_ids, max_length=50, num_return_sequences=1)
generated_text = tokenizer.decode(output[0], skip_special_tokens=True)

print("Generated Text:", generated_text)


Generated Text: The movie was absolutely terrible. The plot was terrible, the acting was terrible, the acting was terrible, the acting was terrible, the acting was terrible, the acting was terrible, the acting was terrible, the acting was terrible, the acting was


### Complete Code for Fine-Tuning a LLaMA Model

In [None]:
from datasets import load_dataset
from transformers import AutoTokenizer, AutoModelForCausalLM, TrainingArguments, Trainer
from peft import LoraConfig, get_peft_model
from transformers import DataCollatorForSeq2Seq

# Load dataset and subset
dataset = load_dataset("stanfordnlp/imdb", split="train")
subset = dataset.select(range(500))

# Load LLaMA small model and tokenizer
base_model = "meta-llama/Llama-2-7b-hf"
tokenizer = AutoTokenizer.from_pretrained(base_model, use_fast=False)

# Assign a padding token
if tokenizer.pad_token is None:
    tokenizer.add_special_tokens({'pad_token': tokenizer.eos_token})

# Load model
model = AutoModelForCausalLM.from_pretrained(
    base_model,
    device_map="auto",
    torch_dtype="auto",
    low_cpu_mem_usage=True,
)

# Resize model embeddings
model.resize_token_embeddings(len(tokenizer))

# Configure LoRA
lora_config = LoraConfig(
    r=4,
    lora_alpha=16,
    lora_dropout=0.1,
    bias="none",
    task_type="CAUSAL_LM",
)
model = get_peft_model(model, lora_config)

# Tokenize dataset
def preprocess_function(examples):
    outputs = tokenizer(
        examples["text"],
        truncation=True,
        padding="max_length",
        max_length=128,
    )
    outputs["labels"] = outputs["input_ids"].copy()
    return outputs

tokenized_dataset = subset.map(preprocess_function, batched=True, remove_columns=subset.column_names)

# Define data collator
data_collator = DataCollatorForSeq2Seq(tokenizer, model=model)

# Training arguments
training_args = TrainingArguments(
    output_dir="./llama_lora_fine_tuned",
    per_device_train_batch_size=1,
    gradient_accumulation_steps=8,
    num_train_epochs=1,
    logging_dir="./logs",
    logging_steps=10,
    save_strategy="epoch",
    evaluation_strategy="no",
    learning_rate=1e-4,
    fp16=False,
    report_to="none",
)

# Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset,
    tokenizer=tokenizer,
    data_collator=data_collator,
)

trainer.train()

# Save fine-tuned model
trainer.model.save_pretrained("./llama_lora_fine_tuned")
tokenizer.save_pretrained("./llama_lora_fine_tuned")


In [1]:
from datasets import load_dataset
from transformers import AutoTokenizer, AutoModelForCausalLM, TrainingArguments, Trainer
from transformers import DataCollatorForSeq2Seq
from peft import LoraConfig, get_peft_model

# Load dataset and subset
dataset = load_dataset("stanfordnlp/imdb", split="train")
subset = dataset.select(range(500))

# Load GPT-2 model and tokenizer (smaller version)
base_model = "gpt2"
tokenizer = AutoTokenizer.from_pretrained(base_model)

# Assign a padding token if none exists
if tokenizer.pad_token is None:
    tokenizer.add_special_tokens({"pad_token": tokenizer.eos_token})

# Load model
model = AutoModelForCausalLM.from_pretrained(base_model, low_cpu_mem_usage=True)

# Resize model embeddings
model.resize_token_embeddings(len(tokenizer))

# Configure LoRA
lora_config = LoraConfig(
    r=4,
    lora_alpha=16,
    lora_dropout=0.1,
    bias="none",
    task_type="CAUSAL_LM",
)
model = get_peft_model(model, lora_config)

# Tokenize dataset
def preprocess_function(examples):
    outputs = tokenizer(
        examples["text"],
        truncation=True,
        padding="max_length",
        max_length=128,
    )
    outputs["labels"] = outputs["input_ids"].copy()
    return outputs

tokenized_dataset = subset.map(preprocess_function, batched=True, remove_columns=subset.column_names)

# Define data collator
data_collator = DataCollatorForSeq2Seq(tokenizer, model=model)

# Training arguments
training_args = TrainingArguments(
    output_dir="./gpt2_lora_fine_tuned",
    per_device_train_batch_size=1,
    gradient_accumulation_steps=8,
    num_train_epochs=1,
    logging_dir="./logs",
    logging_steps=10,
    save_strategy="epoch",
    evaluation_strategy="no",
    learning_rate=1e-4,
    fp16=False,  # Mixed precision if needed
    report_to="none",
)

# Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset,
    tokenizer=tokenizer,
    data_collator=data_collator,
)

# Train
trainer.train()

# Save fine-tuned model
trainer.model.save_pretrained("./gpt2_lora_fine_tuned")
tokenizer.save_pretrained("./gpt2_lora_fine_tuned")


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/26.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/665 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/1.04M [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/548M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/124 [00:00<?, ?B/s]



Map:   0%|          | 0/500 [00:00<?, ? examples/s]

  trainer = Trainer(


Step,Training Loss
10,4.187
20,4.2014
30,4.1824
40,4.133
50,4.3573
60,4.2104


('./gpt2_lora_fine_tuned/tokenizer_config.json',
 './gpt2_lora_fine_tuned/special_tokens_map.json',
 './gpt2_lora_fine_tuned/vocab.json',
 './gpt2_lora_fine_tuned/merges.txt',
 './gpt2_lora_fine_tuned/added_tokens.json',
 './gpt2_lora_fine_tuned/tokenizer.json')

In [2]:
model_path = "./gpt2_lora_fine_tuned"
tokenizer = AutoTokenizer.from_pretrained(model_path)
model = AutoModelForCausalLM.from_pretrained(model_path)

input_text = "The movie was absolutely"
input_ids = tokenizer.encode(input_text, return_tensors="pt")

output = model.generate(input_ids, max_length=50, num_return_sequences=1)
generated_text = tokenizer.decode(output[0], skip_special_tokens=True)

print("Generated Text:", generated_text)


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
The attention mask is not set and cannot be inferred from input because pad token is same as eos token. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.


Generated Text: The movie was absolutely amazing. I was so excited to see it. I was so excited to see the movie. I was so excited to see the movie. I was so excited to see the movie. I was so excited to see the movie.
