<a href="https://colab.research.google.com/github/ananthmeka/GenAI-Examples/blob/main/FineTuning-QLoRA-GPU.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [9]:
# Validation with Base Model
#Load the Fine-Tuned Model and Tokenizer
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM

# Load the model and tokenizer from the saved directory
model = AutoModelForSeq2SeqLM.from_pretrained("google/flan-t5-base")
tokenizer = AutoTokenizer.from_pretrained("google/flan-t5-base")

# Example usage for a question and answer task
def answer_question(question):
    inputs = tokenizer(question, return_tensors="pt")
    outputs = model.generate(inputs.input_ids)
    answer = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return answer

# Example question
question = "What is a correlated anomaly?"
print(answer_question(question))

a symbiotic relationship


In [10]:
!pip install datasets
!pip install peft
!pip install evaluate
!pip install transformers bitsandbytes accelerate peft
!pip install bitsandbytes

from datasets import load_dataset , DatasetDict, Dataset
from transformers import AutoTokenizer, AutoConfig, AutoModelForSeq2SeqLM, DataCollatorWithPadding, TrainingArguments, Trainer
from peft import PeftModel, PeftConfig, get_peft_model, LoraConfig
import evaluate
import torch
import numpy as np
from transformers import DistilBertModel, DistilBertTokenizer



In [11]:
import pandas as pd

# Load the custom data from a CSV file
data = pd.read_csv("./qa_pairs.csv")

# Check the first few rows of the dataset
print(data.head())

                                            question  \
0  What is the topic of section 'Anomalies' in th...   
1  What are the sub-topics covered under 'Anomali...   
2  What are the different properties that can be ...   
3  How many lines does the text contain about ano...   
4    What are the main topics discussed in the text?   

                                              answer  
0  The topic of section 'Anomalies' in the Cisco ...  
1  The 'Anomalies' section in the Cisco Nexus Das...  
2  Anomaly properties include filters, global rul...  
3     The text contains 2 lines about anomaly rules.  
4  The text discusses Anomaly rules, guidelines a...  


In [12]:
# Convert the DataFrame to a Dataset object
dataset = Dataset.from_pandas(data)
# Example function to preprocess data
def preprocess_data(examples):
    return {
        "input_text": ["Question: " + question for question in examples["question"]],
        "target_text": ["Answer: " + answer for answer in examples["answer"]]
    }

# Apply the preprocessing function to the dataset
dataset = dataset.map(preprocess_data, batched=True, remove_columns=["question", "answer"])

dataset

Map:   0%|          | 0/86 [00:00<?, ? examples/s]

Dataset({
    features: ['input_text', 'target_text'],
    num_rows: 86
})

In [13]:


from transformers import AutoModelForCausalLM, AutoTokenizer
import bitsandbytes as bnb
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, BitsAndBytesConfig
import torch

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# QUantization
#Testing

print(f"Torch Version is : {torch.__version__}")
print(bnb.__version__)
# Check if BitsAndBytesConfig is accessible
print(hasattr(BitsAndBytesConfig, 'load_in_8bit'))


model_name = "google/flan-t5-base"

# Define the configuration for quantization
quantization_config = BitsAndBytesConfig(load_in_8bit=True)

# Load the tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_name)

# Load the model with the quantization config and set to use the CPU
device_map = {"": "cuda"}
model = AutoModelForSeq2SeqLM.from_pretrained(
    model_name,
    quantization_config=quantization_config,
    device_map=device_map
)

# Now let's check the dtype of all parameters
for name, param in model.named_parameters():
    print(f"Layer: {name}, dtype: {param.dtype}")


Using device: cpu
Torch Version is : 2.5.1+cu124
0.45.3
True


ImportError: Using `bitsandbytes` 8-bit quantization requires the latest version of bitsandbytes: `pip install -U bitsandbytes`

In [6]:
# Tokenize the dataset
def tokenize_function(examples):
    model_inputs = tokenizer(examples["input_text"], max_length=512, truncation=True)
    with tokenizer.as_target_tokenizer():
        labels = tokenizer(examples["target_text"], max_length=512, truncation=True)
    model_inputs["labels"] = labels["input_ids"]
    model_inputs["input_ids"] = model_inputs["input_ids"]
    return model_inputs

tokenized_dataset = dataset.map(tokenize_function, batched=True, remove_columns=["input_text", "target_text"])
tokenized_dataset

Map:   0%|          | 0/86 [00:00<?, ? examples/s]



Dataset({
    features: ['input_ids', 'attention_mask', 'labels'],
    num_rows: 86
})

In [7]:
from peft import LoraConfig, get_peft_model, TaskType

lora_config = LoraConfig(
    r=32, # Rank
    lora_alpha=32,
    target_modules=["q", "v"],
    lora_dropout=0.05,
    bias="none",
    task_type=TaskType.SEQ_2_SEQ_LM # FLAN-T5
)

In [8]:
def print_number_of_trainable_model_parameters(model):
    trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
    print(f"Number of trainable parameters: {trainable_params}")

peft_model = get_peft_model(model,
                            lora_config)
print(print_number_of_trainable_model_parameters(peft_model))

Number of trainable parameters: 3538944
None


In [9]:
# Set the Training Params
from transformers import DataCollatorForSeq2Seq

# we want to ignore tokenizer pad token in the loss
label_pad_token_id = -100
# Data collator
data_collator = DataCollatorForSeq2Seq(
    tokenizer,
    model=peft_model,
    label_pad_token_id=label_pad_token_id,
    pad_to_multiple_of=8
)


In [12]:
# Data collator to handle padding
from transformers import DataCollatorForSeq2Seq, Seq2SeqTrainingArguments, Seq2SeqTrainer
#data_collator = DataCollatorForSeq2Seq(tokenizer, model=model)

# Set up training arguments
training_args = Seq2SeqTrainingArguments(
    output_dir="./results",
    eval_strategy="epoch",
    learning_rate=2e-5,
    per_device_train_batch_size=2,
    per_device_eval_batch_size=2,
    weight_decay=0.01,
    save_total_limit=3,
    num_train_epochs=100,
    predict_with_generate=True,
    remove_unused_columns=False,
    gradient_accumulation_steps=4,
    #no_cuda=True  # Force training on CPU
    report_to="none"

)

# Initialize the Trainer
trainer = Seq2SeqTrainer(
    model=peft_model,
    args=training_args,
    train_dataset=tokenized_dataset,
    eval_dataset=tokenized_dataset,
    tokenizer=tokenizer,
    data_collator=data_collator,
)

# Fine-tune the model
trainer.train()

# Check the dtype of the model layers after training with LoRA
for name, param in model.named_parameters():
    print(f"Layer: {name}, dtype: {param.dtype}")



  trainer = Seq2SeqTrainer(


Epoch,Training Loss,Validation Loss
0,No log,2.654297
1,No log,2.644531
2,No log,2.634766
3,No log,2.625
4,No log,2.613281
5,No log,2.603516
6,No log,2.59375
7,No log,2.582031
8,No log,2.572266
9,No log,2.560547


TrainOutput(global_step=2000, training_loss=2.493558288574219, metrics={'train_runtime': 3144.1955, 'train_samples_per_second': 5.47, 'train_steps_per_second': 0.636, 'total_flos': 590697703194624.0, 'train_loss': 2.493558288574219, 'epoch': 199.93023255813952})

In [1]:
model.save_pretrained("./FineTune/flan-t5-base-QLORA-NDI/")
tokenizer.save_pretrained("./FineTune/flan-t5-base-QLORA-NDI/")

NameError: name 'model' is not defined

In [24]:
#Load the Fine-Tuned Model and Tokenizer
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM

# Load the model and tokenizer from the saved directory


from transformers import AutoModelForSeq2SeqLM, AutoTokenizer


base_model_path = "google/flan-t5-base"  # Path to the base model
fine_tuned_model_path = "./FineTune/flan-t5-base-QLORA-NDI/"  # Path to your quantized model (QLORA)

# Load the base model (e.g., flan-t5-base)
base_model = AutoModelForSeq2SeqLM.from_pretrained(base_model_path)

# Load the tokenizer for the base model
tokenizer = AutoTokenizer.from_pretrained(base_model_path)


# Now load the QLORA model weights separately (model.safetensors, etc.)
# In case you saved the model as 'safetensors', we need to load it
# But you need to load it manually with torch or using `safetensors` library
from safetensors.torch import load_file

# Assuming your model weights are saved in `model.safetensors`
qlora_weights = load_file(fine_tuned_model_path + "/model.safetensors")


# Transfer weights from QLORA model to the base model
for name, param in base_model.named_parameters():
    # If the parameter exists in the QLORA weights, load it
    if name in qlora_weights:
        qlora_param = qlora_weights[name]

        # Adjust the weight type if needed (e.g., converting quantized weights to float32)
        if qlora_param.dtype != torch.float32:
            qlora_param = qlora_param.float()

        # Now, assign the weights to the base model
        param.data.copy_(qlora_param.data)





# Set the quantization config for loading a quantized model
bnb_config = BitsAndBytesConfig(load_in_8bit=True)  # Specify the quantization method (e.g., 8-bit)

model = AutoModelForSeq2SeqLM.from_pretrained(fine_tuned_model_path,  quantization_config=bnb_config)
tokenizer = AutoTokenizer.from_pretrained(fine_tuned_model_path)

'''
#model = AutoModelForSeq2SeqLM.from_pretrained("google/flan-t5-base") # Load base model first
model = AutoModelForSeq2SeqLM.from_pretrained("./FineTune/flan-t5-base-QLORA-NDI/") # Load model
tokenizer = AutoTokenizer.from_pretrained("./FineTune/flan-t5-base-QLORA-NDI/") # Load the tokenizer
#model = PeftModel.from_pretrained(model, "./FineTune/flan-t5-base-QLORA-NDI/", is_trainable=False, config='./FineTune/flan-t5-base-QLORA-NDI/config.json') # Load LoRA weights onto base model
'''

# Example usage for a question and answer task
def answer_question(question):
    inputs = tokenizer(question, return_tensors="pt")
    outputs = model.generate(inputs.input_ids)
    answer = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return answer

# Example question
question = "What is a correlated anomaly?"
print(answer_question(question))

Unused kwargs: ['_load_in_4bit', '_load_in_8bit', 'quant_method']. These kwargs are not used in <class 'transformers.utils.quantization_config.BitsAndBytesConfig'>.
`low_cpu_mem_usage` was None, now default to True since model is quantized.


RuntimeError: "normal_kernel_cpu" not implemented for 'Char'