<a href="https://colab.research.google.com/github/doraemonidol/NLP/blob/main/Fine_Tune_Bloom_model_with_LoRA.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
DATA_PATH = "train.json"

In [2]:
# Install required packages using pip
!pip install -q bitsandbytes datasets accelerate loralib peft trl transformers==4.45.2 sentence-transformers==3.1.1

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.4/44.4 kB[0m [31m1.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m9.9/9.9 MB[0m [31m27.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m245.3/245.3 kB[0m [31m14.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m365.7/365.7 kB[0m [31m15.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.0/3.0 MB[0m [31m24.9 MB/s[0m eta [36m0:00:00[0m
[?25h

In [3]:


# Import necessary libraries
import torch  # PyTorch library for deep learning
import torch.nn as nn  # Neural network module from PyTorch
import bitsandbytes as bnb  # Library for efficient CUDA operations
from transformers import AutoTokenizer, AutoConfig, AutoModelForCausalLM  # Importing classes from Hugging Face Transformers
import transformers

# Initialize and load the pre-trained BLOOM model from Hugging Face
model_bloom = AutoModelForCausalLM.from_pretrained(
    "bigscience/bloom-560m",  # Model identifier
    torch_dtype=torch.float32,  # Set tensor data type to float32
    device_map='auto',  # Automatically map the model to available device(s)
)

# Initialize and load the tokenizer specific to the BLOOM model
tokenizer = AutoTokenizer.from_pretrained("bigscience/tokenizer")  # Tokenizer identifier



In [4]:
"""Setting up LoRA (Low-Rank Adaptation) using parameter efficient fine tuning."""

from peft import LoraConfig, get_peft_model  # Importing LoRA configuration and model function from PEFT library

# Configure LoRA settings for the fine-tuning process
conf = LoraConfig(
    r=10,  # Rank of the low-rank matrices
    lora_alpha=10,  # Scaling factor for LoRA
    target_modules=["query_key_value"],  # Model layers to apply LoRA
    lora_dropout=0.05,  # Dropout rate for regularization
    bias="none",  # Bias setting for the layers
    task_type="CAUSAL_LM"  # Type of the language model task
)

# Apply the LoRA configuration to the pre-trained BLOOM model
# This step modifies the model in place, hence the variable is renamed for clarity
peft_bloom = get_peft_model(model_bloom, conf)

In [5]:
"""Comparing parameters before and after applying LoRA to assess the training scope."""

train_parameters = 0  # Initialize counter for trainable parameters
total_params = 0  # Initialize counter for total parameters

# Loop through all parameters in the LoRA-modified model
for _, param in peft_bloom.named_parameters():
    total_params += param.numel()  # Add the total number of elements in the parameter to the total count
    if param.requires_grad:  # Check if the parameter is trainable
        train_parameters += param.numel()  # Add to trainable parameters count if it requires gradient

# Display the results
print(f"Trainable params: {train_parameters}")  # Number of trainable parameters
print(f"All params: {total_params}")  # Total number of parameters in the model
print(f"Trainable: {100 * train_parameters / total_params:.2f}%")  # Percentage of trainable parameters

Trainable params: 983040
All params: 560197632
Trainable: 0.18%


In [6]:
from datasets import load_dataset

data = load_dataset("json", data_files=DATA_PATH)

train_val = data["train"].train_test_split(shuffle=True, seed=42)
train_data = train_val["train"]
val_data = train_val["test"]

In [7]:

CUTOFF_LEN = 256  # 256 accounts for about 96% of the data

def tokenize(prompt):
    # there's probably a way to do this with the tokenizer settings
    # but again, gotta move fast
    result = tokenizer(
        prompt,
        truncation=True,
        max_length=CUTOFF_LEN + 1,
        padding="max_length",
    )
    return {
        "input_ids": result["input_ids"][:-1],
        "attention_mask": result["attention_mask"][:-1],
    }


def generate_and_tokenize_prompt(data_point):
    # This function masks out the labels for the input,
    # so that our loss is computed only on the response.
    user_prompt = (
        (
            f"""Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.

### Instruction:
{data_point["instruction"]}

### Input:
{data_point["input"]}

### Response:
"""
        )
        if data_point["input"]
        else (
            f"""Below is an instruction that describes a task. Write a response that appropriately completes the request.

### Instruction:
{data_point["instruction"]}

### Response:
"""
        )
    )
    len_user_prompt_tokens = (
        len(
            tokenizer(
                user_prompt,
                truncation=True,
                max_length=CUTOFF_LEN + 1,
                padding="max_length",
            )["input_ids"]
        )
        - 1
    )  # no eos token
    full_tokens = tokenizer(
        user_prompt + data_point["output"],
        truncation=True,
        max_length=CUTOFF_LEN + 1,
        padding="max_length",
    )["input_ids"][:-1]
    return {
        "input_ids": full_tokens,
        "labels": [-100] * len_user_prompt_tokens
        + full_tokens[len_user_prompt_tokens:],
        "attention_mask": [1] * (len(full_tokens)),
    }


train_data = train_data.shuffle().map(generate_and_tokenize_prompt)
val_data = val_data.shuffle().map(generate_and_tokenize_prompt)

Map:   0%|          | 0/24408 [00:00<?, ? examples/s]

Map:   0%|          | 0/8137 [00:00<?, ? examples/s]

In [8]:
trainer = transformers.Trainer(
    model=peft_bloom,
    train_dataset=train_data,
    eval_dataset=val_data,
    args=transformers.TrainingArguments(
        per_device_train_batch_size=4,
        gradient_accumulation_steps=4,
        warmup_steps=100,
        max_steps=150,
        learning_rate=1e-3,
        fp16=True,
        logging_steps=1,
        output_dir='outputs',
        report_to="none"
    ),
    data_collator=transformers.DataCollatorForLanguageModeling(tokenizer, mlm=False)
)
peft_bloom.config.use_cache = False  # silence the warnings. Please re-enable for inference!
trainer.train()

max_steps is given, it will override any value given in num_train_epochs


Step,Training Loss
1,7.1843


KeyboardInterrupt: 