<a href="https://colab.research.google.com/github/gupta24789/llms-fine-tuning/blob/main/gemma/fine_tune_gemma_using_qlora.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Objective

- Fine tune **google/gemma-2b** model.
- Dataset : **b-mc2/sql-create-context**

In [None]:
# !pip3 install -q -U bitsandbytes==0.42.0
# !pip3 install -q -U peft==0.8.2
# !pip3 install -q -U trl==0.7.10
# !pip3 install -q -U accelerate==0.27.1
# !pip3 install -q -U datasets==2.17.0
# !pip3 install -q -U transformers==4.38.0

In [None]:
import os
os.environ['CUDA_VISIBLE_DEVICES'] = "1"

In [None]:
import torch
from datasets import load_dataset
from dotenv import load_dotenv
from pprint import pprint
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig,TrainingArguments,pipeline,logging
from peft import LoraConfig, prepare_model_for_kbit_training, get_peft_model, AutoPeftModelForCausalLM, PeftModel
from trl import SFTTrainer
load_dotenv()

True

## Load Dataset

In [None]:
dataset = load_dataset("b-mc2/sql-create-context", split = "train")
dataset

Dataset({
    features: ['context', 'answer', 'question'],
    num_rows: 78577
})

In [None]:
## Create Train & Test Split
dataset = dataset.train_test_split(test_size = 0.1, seed=42)
dataset

DatasetDict({
    train: Dataset({
        features: ['context', 'answer', 'question'],
        num_rows: 70719
    })
    test: Dataset({
        features: ['context', 'answer', 'question'],
        num_rows: 7858
    })
})

In [None]:
dataset['train'][0]

{'context': 'CREATE TABLE table_name_75 (insurgents VARCHAR, civilians VARCHAR)',
 'answer': 'SELECT insurgents FROM table_name_75 WHERE civilians = "49"',
 'question': 'Name the insurgents for civilians being 49'}

## Prepare Data

Data Format:

        Question : <question>
        Context : <context>
        Answer : <answer>

In [None]:
def transform_data(row):
    text = f"Question : {row['question']}\nContext : {row['context']}\nAnswer : {row['answer']}"
    return {"text":text}

In [None]:
transformed_dataset = dataset.map(transform_data)
transformed_dataset

DatasetDict({
    train: Dataset({
        features: ['context', 'answer', 'question', 'text'],
        num_rows: 70719
    })
    test: Dataset({
        features: ['context', 'answer', 'question', 'text'],
        num_rows: 7858
    })
})

In [None]:
print(transformed_dataset['train'][0]['text'])

Question : Name the insurgents for civilians being 49
Context : CREATE TABLE table_name_75 (insurgents VARCHAR, civilians VARCHAR)
Answer : SELECT insurgents FROM table_name_75 WHERE civilians = "49"


## Load Model & Tokenizer

In [None]:
bnb_config = BitsAndBytesConfig(
    load_in_4bit= True,
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_quant_type = 'nf4',
    bnb_4bit_use_double_quant = True
)


model_name = "google/gemma-2b"


## tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_name)
print(f"Vocab size : {tokenizer.vocab_size}")
print(f"PAD TOKEN : {tokenizer.pad_token}")


## model
model = AutoModelForCausalLM.from_pretrained(model_name, quantization_config = bnb_config,
                             device_map = {"":0}, token = os.environ['HF_READ_TOKEN'])
model.config.use_cache = False

Vocab size : 256000
PAD TOKEN : <pad>


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

## Inference Before Training

In [None]:
text = """Question: What is the average number of working horses of farms with greater than 45 total number of horses?
Context: CREATE TABLE farm (Working_Horses INTEGER, Total_Horses INTEGER)"""

answer = "SELECT AVG(Working_Horses) FROM farm WHERE Total_Horses > 45"
dash_line = dash_line = '-'.join('' for x in range(100))

device = "cuda" if torch.cuda.is_available() else "cpu"
inputs = tokenizer(text, return_tensors="pt").to(device)
outputs = model.generate(**inputs, max_new_tokens = 20)

print(tokenizer.decode(outputs[0], skip_special_tokens=True))

Question: What is the average number of working horses of farms with greater than 45 total number of horses?
Context: CREATE TABLE farm (Working_Horses INTEGER, Total_Horses INTEGER)
INSERT INTO farm VALUES (10, 100);
INSERT INTO farm VALUES (


## Traning Setup

In [None]:
def print_trainable_parameters(model):
    """
    Prints the number of trainable parameters in the model.
    """
    trainable_params = 0
    all_param = 0
    for _, param in model.named_parameters():

        all_param += param.numel()
        if param.requires_grad:
            trainable_params += param.numel()
    print(
        f"trainable params: {trainable_params} || all params: {all_param} || trainable%: {100 * trainable_params / all_param}"
    )

In [None]:
model.gradient_checkpointing_enable()
model = prepare_model_for_kbit_training(model)
print(model)

GemmaForCausalLM(
  (model): GemmaModel(
    (embed_tokens): Embedding(256000, 2048, padding_idx=0)
    (layers): ModuleList(
      (0-17): 18 x GemmaDecoderLayer(
        (self_attn): GemmaAttention(
          (q_proj): Linear4bit(in_features=2048, out_features=2048, bias=False)
          (k_proj): Linear4bit(in_features=2048, out_features=256, bias=False)
          (v_proj): Linear4bit(in_features=2048, out_features=256, bias=False)
          (o_proj): Linear4bit(in_features=2048, out_features=2048, bias=False)
          (rotary_emb): GemmaRotaryEmbedding()
        )
        (mlp): GemmaMLP(
          (gate_proj): Linear4bit(in_features=2048, out_features=16384, bias=False)
          (up_proj): Linear4bit(in_features=2048, out_features=16384, bias=False)
          (down_proj): Linear4bit(in_features=16384, out_features=2048, bias=False)
          (act_fn): GELUActivation()
        )
        (input_layernorm): GemmaRMSNorm()
        (post_attention_layernorm): GemmaRMSNorm()
      )
 

In [None]:
## Lora config
lora_config = LoraConfig(
    r= 32,
    lora_alpha=64,
    lora_dropout=0.1,
    target_modules=["q_proj", "o_proj", "k_proj", "v_proj","gate_proj", "up_proj", "down_proj"],
    bias="none",
    task_type="CAUSAL_LM",
)

model = get_peft_model(model, lora_config)
print_trainable_parameters(model)

trainable params: 39223296 || all params: 1554491392 || trainable%: 2.5232237503441897


## Training Arguments

In [None]:
CHECKPOINTS_DIR = "checkpoints"

training_arguments = TrainingArguments(
    output_dir=CHECKPOINTS_DIR,
    per_device_train_batch_size=4,
    per_device_eval_batch_size=4,
    gradient_accumulation_steps=4,
    num_train_epochs=1,
    learning_rate=1e-4,
    optim="paged_adamw_32bit",
    logging_steps=10,

    evaluation_strategy="steps",
    save_strategy="steps",
    eval_steps=10,
    max_steps=200,

    fp16=True,
    max_grad_norm=0.3,
    warmup_ratio=0.05,
    group_by_length=True,
    report_to="tensorboard",
    save_safetensors=True,
    lr_scheduler_type="cosine",
    seed=42,
)
model.config.use_cache = False  # silence the warnings. re-enable for inference!

## SFT Trainer
trainer = SFTTrainer(
    model=model,
    train_dataset=transformed_dataset['train'],
    eval_dataset= transformed_dataset['test'],
    peft_config=lora_config,
    dataset_text_field="text",
    max_seq_length=512,
    tokenizer=tokenizer,
    args=training_arguments
)

Map:   0%|          | 0/70719 [00:00<?, ? examples/s]

Map:   0%|          | 0/7858 [00:00<?, ? examples/s]

Detected kernel version 4.15.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.


## Train Model

In [None]:
## ran for 200 steps
trainer.train()

Step,Training Loss,Validation Loss
10,1.2074,1.1013
20,1.122,1.0455
30,1.036,1.018756
40,0.9311,1.020099
50,0.8509,1.057535
60,1.0969,1.004143
70,1.0805,1.000473
80,0.9425,0.976737
90,0.8785,0.973977
100,0.8272,0.999553


TrainOutput(global_step=200, training_loss=0.9700161218643188, metrics={'train_runtime': 6855.1614, 'train_samples_per_second': 0.467, 'train_steps_per_second': 0.029, 'total_flos': 2784617530146816.0, 'train_loss': 0.9700161218643188, 'epoch': 0.05})

# Save trained model

In [None]:
# from huggingface_hub import notebook_login
# notebook_login()

In [None]:
## If you will get the access error then uncomment and run above cell
## This will only save the adapter
peft_model_path = "finetuned-adapters"
tokenizer.save_pretrained(peft_model_path)
trainer.model.save_pretrained(peft_model_path)

## Inference

In [None]:
# Ignore warnings
logging.set_verbosity(logging.CRITICAL)

In [None]:
model.config.use_cache = True
model.eval()

PeftModelForCausalLM(
  (base_model): LoraModel(
    (model): GemmaForCausalLM(
      (model): GemmaModel(
        (embed_tokens): Embedding(256000, 2048, padding_idx=0)
        (layers): ModuleList(
          (0-17): 18 x GemmaDecoderLayer(
            (self_attn): GemmaAttention(
              (q_proj): lora.Linear4bit(
                (base_layer): Linear4bit(in_features=2048, out_features=2048, bias=False)
                (lora_dropout): ModuleDict(
                  (default): Dropout(p=0.1, inplace=False)
                )
                (lora_A): ModuleDict(
                  (default): Linear(in_features=2048, out_features=32, bias=False)
                )
                (lora_B): ModuleDict(
                  (default): Linear(in_features=32, out_features=2048, bias=False)
                )
                (lora_embedding_A): ParameterDict()
                (lora_embedding_B): ParameterDict()
              )
              (k_proj): lora.Linear4bit(
                (base_laye

In [None]:
text = """Question: What is the average number of working horses of farms with greater than 45 total number of horses?
Context: CREATE TABLE farm (Working_Horses INTEGER, Total_Horses INTEGER)"""

answer = "SELECT AVG(Working_Horses) FROM farm WHERE Total_Horses > 45"
dash_line = dash_line = '-'.join('' for x in range(100))

device = "cuda" if torch.cuda.is_available() else "cpu"
inputs = tokenizer(text, return_tensors="pt").to(device)
outputs = model.generate(**inputs, max_new_tokens = 20)

print(tokenizer.decode(outputs[0], skip_special_tokens=True))

Question: What is the average number of working horses of farms with greater than 45 total number of horses?
Context: CREATE TABLE farm (Working_Horses INTEGER, Total_Horses INTEGER)
Answer: SELECT AVG(Working_Horses) FROM farm WHERE Total_Horses > 45


In [None]:
del model
del tokenizer
import gc
gc.collect()
gc.collect()

0

## Save Full Model

In [None]:
# Reload model in FP16 and merge it with LoRA weights
base_model = AutoModelForCausalLM.from_pretrained(
    model_name,
    low_cpu_mem_usage=True,
    return_dict=True,
    torch_dtype=torch.float16,
    device_map={"":0},
)
model = PeftModel.from_pretrained(base_model, peft_model_path)
model = model.merge_and_unload()

# Reload tokenizer to save it
tokenizer = AutoTokenizer.from_pretrained(peft_model_path)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [None]:
## Save Full Model
complete_model_path = "finetuned-models"
trainer.model.save_pretrained(complete_model_path)
model.save_pretrained(complete_model_path)

In [None]:
text = """Question: What is the average number of working horses of farms with greater than 45 total number of horses?
Context: CREATE TABLE farm (Working_Horses INTEGER, Total_Horses INTEGER)"""

answer = "SELECT AVG(Working_Horses) FROM farm WHERE Total_Horses > 45"
dash_line = dash_line = '-'.join('' for x in range(100))

device = "cuda" if torch.cuda.is_available() else "cpu"
inputs = tokenizer(text, return_tensors="pt").to(device)
outputs = model.generate(**inputs, max_new_tokens = 20)

print(tokenizer.decode(outputs[0], skip_special_tokens=True))

Question: What is the average number of working horses of farms with greater than 45 total number of horses?
Context: CREATE TABLE farm (Working_Horses INTEGER, Total_Horses INTEGER)
Answer: SELECT AVG(Working_Horses) FROM farm WHERE Total_Horses > 45
