## Fine Tuning Gemma 2 2B Instruct using Math Orca 200k Dataset

In [2]:
!pip install -U torch
!pip install -U datasets
!pip install -U sentence_transformers
!pip install -U accelerate
!pip install -U transformers
!pip install -U bitsandbytes
!pip install -U peft
!pip install -U trl

Collecting datasets
  Downloading datasets-3.2.0-py3-none-any.whl.metadata (20 kB)
Collecting dill<0.3.9,>=0.3.0 (from datasets)
  Downloading dill-0.3.8-py3-none-any.whl.metadata (10 kB)
Collecting xxhash (from datasets)
  Downloading xxhash-3.5.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)
Collecting multiprocess<0.70.17 (from datasets)
  Downloading multiprocess-0.70.16-py310-none-any.whl.metadata (7.2 kB)
Collecting fsspec<=2024.9.0,>=2023.1.0 (from fsspec[http]<=2024.9.0,>=2023.1.0->datasets)
  Downloading fsspec-2024.9.0-py3-none-any.whl.metadata (11 kB)
Downloading datasets-3.2.0-py3-none-any.whl (480 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m480.6/480.6 kB[0m [31m10.2 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading dill-0.3.8-py3-none-any.whl (116 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m116.3/116.3 kB[0m [31m11.9 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading fsspec-2024.9.0-py3-none-any.whl 

In [1]:
import os
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
from datasets import load_dataset
from peft import LoraConfig, PeftModel

In [2]:
from huggingface_hub import notebook_login

notebook_login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [2]:
lora_config = LoraConfig(
    r=2,
    target_modules=["q_proj", "o_proj", "k_proj", "v_proj"],
    task_type="CAUSAL_LM",
)

In [7]:
compute_dtype = getattr(torch, "float16")

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=compute_dtype,
    bnb_4bit_use_double_quant=True,
)

if compute_dtype == torch.float16:
    major, _ = torch.cuda.get_device_capability()
    if major >= 8:
        print("=" * 80)
        print("Your GPU supports bfloat16: accelerate training with bf16=True")
        print("=" * 80)

Your GPU supports bfloat16: accelerate training with bf16=True


In [8]:
tokenizer = AutoTokenizer.from_pretrained("google/gemma-2-2b-it")
model = AutoModelForCausalLM.from_pretrained("google/gemma-2-2b-it", quantization_config=bnb_config, device_map="auto")

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [6]:
tokenizer.padding_side = "right"

In [8]:
from datasets import load_dataset

data = load_dataset("microsoft/orca-math-word-problems-200k")
def formatting_func(example):
    formatted_texts = [
        f"Question: {q}\nAnswer: {a}"
        for q, a in zip(example["question"], example["answer"])
    ]
    return {"formatted_text": formatted_texts}

data = data.map(formatting_func, batched=True)

def tokenize_data(samples):
    return tokenizer(samples["formatted_text"], padding=True, truncation=True)

data = data.map(tokenize_data, batched=True)

Map:   0%|          | 0/200035 [00:00<?, ? examples/s]

Map:   0%|          | 0/200035 [00:00<?, ? examples/s]

Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


In [35]:
# from datasets import load_dataset

# data = load_dataset("microsoft/orca-math-word-problems-200k")

# data["train"] = data["train"].select(range(5000))

# PREAMBLE = """As an expert problem solver solve step by step the following mathematical questions."""
# TEMPLATE = """
# Q: {question}
# A:"""

# preamble_ids = tokenizer(PREAMBLE, padding=False)["input_ids"]

# def format_data(samples):
#     input_ids_list = []

#     for q, ans in zip(samples["question"], samples["answer"]):
#         dynamic_prompt = TEMPLATE.format(question=q) + " " + ans
#         dynamic_ids = tokenizer(dynamic_prompt, padding=False, truncation=True)["input_ids"]
#         full_input_ids = preamble_ids + dynamic_ids
#         input_ids_list.append(full_input_ids)

#     return {"input_ids": input_ids_list}

# data = data.map(format_data, batched=True)

In [11]:
import transformers
from trl import SFTTrainer

trainer = SFTTrainer(
    model=model,
    train_dataset=data["train"],
    args=transformers.TrainingArguments(
        per_device_train_batch_size=1,
        gradient_accumulation_steps=4,
        max_steps=5000,
        save_steps=250,
        save_total_limit=10,
        learning_rate=1e-4,
        fp16=True,
        logging_steps=100,
        output_dir="/content/drive/MyDrive/lora_gold",
        optim="paged_adamw_8bit",
        report_to="none",
    ),
    tokenizer = tokenizer,
    peft_config=lora_config,
)

  trainer = SFTTrainer(


In [12]:
def print_number_of_trainable_model_parameters(model):
    trainable_model_params = 0
    all_model_params = 0
    for _, param in model.named_parameters():
        all_model_params += param.numel()
        if param.requires_grad:
            trainable_model_params += param.numel()
    return f"trainable model parameters: {trainable_model_params}\nall model parameters: {all_model_params}\npercentage of trainable model parameters: {100 * trainable_model_params / all_model_params:.2f}%"

print(print_number_of_trainable_model_parameters(model))

trainable model parameters: 798720
all model parameters: 1603002624
percentage of trainable model parameters: 0.05%


In [13]:
trainer.train()

Step,Training Loss
100,0.6861
200,0.5982
300,0.6049
400,0.6036
500,0.5745
600,0.5901
700,0.5828
800,0.5776
900,0.5736
1000,0.5745


KeyboardInterrupt: 

In [10]:
output_dir = "/content/drive/MyDrive/lora_gold/checkpoint-750"

In [11]:
from peft import PeftModel

ft_tokenizer = AutoTokenizer.from_pretrained(output_dir)
ft_tokenizer.padding_side = "right"
ft_tokenizer.pad_token = ft_tokenizer.eos_token

In [12]:
base_model = AutoModelForCausalLM.from_pretrained("google/gemma-2-2b-it", quantization_config=bnb_config, device_map="auto")
ft_model = PeftModel.from_pretrained(base_model, output_dir)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [16]:
text = "Janet’s ducks lay 16 eggs per day. She eats three for breakfast every morning and bakes muffins for her friends every day with four. She sells the remainder at the farmers' market daily for $2 per fresh duck egg. How much in dollars does she make every day at the farmers' market?"
prompt = "Question: " + text + "\nAnswer:"

input_ids = ft_tokenizer(prompt, return_tensors="pt", padding=True, truncation=True).to("cuda")

outputs = ft_model.generate(input_ids=input_ids.input_ids, max_length=1024, num_return_sequences=1, pad_token_id=ft_tokenizer.eos_token_id)

response_text = ft_tokenizer.decode(outputs[0], skip_special_tokens=True)
print(response_text)

Question: Janet’s ducks lay 16 eggs per day. She eats three for breakfast every morning and bakes muffins for her friends every day with four. She sells the remainder at the farmers' market daily for $2 per fresh duck egg. How much in dollars does she make every day at the farmers' market?
Answer: Janet eats 3 eggs for breakfast every day, so she eats 3 eggs per day.

She bakes muffins for her friends every day with 4 eggs, so she bakes 4 eggs per day.

The total number of eggs she eats and bakes is 3 + 4 = 7 eggs per day.

She sells the remainder of the eggs, which is 16 - 7 = 9 eggs per day.

She sells these eggs for $2 per egg, so she makes 9 * $2 = $18 per day at the farmers' market.

Therefore, Janet makes $18 every day at the farmers' market.


In [17]:
gsm8k = load_dataset("gsm8k", "main", cache_dir='/tmp')
gsm8k_train, gsm8k_test = gsm8k['train'], gsm8k['test']

README.md:   0%|          | 0.00/7.94k [00:00<?, ?B/s]

train-00000-of-00001.parquet:   0%|          | 0.00/2.31M [00:00<?, ?B/s]

test-00000-of-00001.parquet:   0%|          | 0.00/419k [00:00<?, ?B/s]

Generating train split:   0%|          | 0/7473 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/1319 [00:00<?, ? examples/s]

In [19]:
%%time
for task_id, problem in enumerate(gsm8k_test):
  if task_id == 10: break

  prompt = "Question: " + problem['question'] + "\nAnswer:"
  print(f"task_id {task_id}")

  input_ids = ft_tokenizer(prompt, return_tensors='pt').to("cuda")
  outputs = ft_model.generate(input_ids=input_ids.input_ids, max_length=1024, num_return_sequences=1, pad_token_id=ft_tokenizer.eos_token_id)
  response_text = ft_tokenizer.decode(outputs[0], skip_special_tokens=True)
  print(response_text)
  print('-' * 40)
  print(response_text.strip().split("\n")[-1])
  print('=' * 40)



task_id 0
Question: Janet’s ducks lay 16 eggs per day. She eats three for breakfast every morning and bakes muffins for her friends every day with four. She sells the remainder at the farmers' market daily for $2 per fresh duck egg. How much in dollars does she make every day at the farmers' market?
Answer: Janet eats 3 eggs for breakfast every day, so she eats 3 eggs per day.

She bakes muffins for her friends every day with 4 eggs, so she bakes 4 eggs per day.

The total number of eggs she eats and bakes is 3 + 4 = 7 eggs per day.

She sells the remainder of the eggs, which is 16 - 7 = 9 eggs per day.

She sells these eggs for $2 per egg, so she makes 9 * $2 = $18 per day at the farmers' market.

Therefore, Janet makes $18 every day at the farmers' market.
----------------------------------------
Therefore, Janet makes $18 every day at the farmers' market.
task_id 1
Question: A robe takes 2 bolts of blue fiber and half that much white fiber.  How many bolts in total does it take?
Ans