# Finetuning Using Google Gemma's Model

In [1]:
# !pip3 install -q -U bitsandbytes==0.42.0
# !pip3 install -q -U peft==0.8.2
# !pip3 install -q -U trl==0.7.10
# !pip3 install -q -U accelerate==0.27.1
# !pip3 install -q -U datasets==2.17.0
# !pip3 install -q -U transformers==4.38.0

In [2]:
import os
import transformers
import torch
# from google.colab import userdata
from datasets import load_dataset
from trl import SFTTrainer
from peft import LoraConfig
from transformers import AutoTokenizer, AutoModelForCausalLM
from transformers import BitsAndBytesConfig, GemmaTokenizer

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
os.environ["HF_TOKEN"] = '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'

### Prerequisites
* nf4(4-bit NormalFloat(NF4)) : https://www.kaggle.com/code/lorentzyeung/what-s-4-bit-quantization-how-does-it-help-llama2


In [4]:
model_id = "google/gemma-2b"
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16
)

In [5]:
tokenizer = AutoTokenizer.from_pretrained(model_id, token=os.environ['HF_TOKEN'])
model = AutoModelForCausalLM.from_pretrained(model_id,
                                             quantization_config=bnb_config,
                                             device_map={"":0},
                                             token=os.environ['HF_TOKEN'])

Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]Error while downloading from https://cdn-lfs-us-1.huggingface.co/repos/31/b7/31b74c3cc903ef02c515aa6049fe011e594e8ee2ed47e51e228020eacc6f98df/98caad07f1e1d17fede7734906d6881d4cb231e1d0cd2ebcf7fc8a6317eee4d6?response-content-disposition=attachment%3B+filename*%3DUTF-8%27%27model-00001-of-00002.safetensors%3B+filename%3D%22model-00001-of-00002.safetensors%22%3B&Expires=1717727128&Policy=eyJTdGF0ZW1lbnQiOlt7IkNvbmRpdGlvbiI6eyJEYXRlTGVzc1RoYW4iOnsiQVdTOkVwb2NoVGltZSI6MTcxNzcyNzEyOH19LCJSZXNvdXJjZSI6Imh0dHBzOi8vY2RuLWxmcy11cy0xLmh1Z2dpbmdmYWNlLmNvL3JlcG9zLzMxL2I3LzMxYjc0YzNjYzkwM2VmMDJjNTE1YWE2MDQ5ZmUwMTFlNTk0ZThlZTJlZDQ3ZTUxZTIyODAyMGVhY2M2Zjk4ZGYvOThjYWFkMDdmMWUxZDE3ZmVkZTc3MzQ5MDZkNjg4MWQ0Y2IyMzFlMWQwY2QyZWJjZjdmYzhhNjMxN2VlZTRkNj9yZXNwb25zZS1jb250ZW50LWRpc3Bvc2l0aW9uPSoifV19&Signature=JrX2mVDjWn84WtICKqfXymAFE%7EUR46ldVvMRdH57OC5XetSIXnOhOf-7XLg9RZm-fYd9XgNJQAYJJA7KcvMNGWbsZU5ZjTQDygIRAJBpSPQ9GeQcG37qttAionHBX77tWaDpEBa65oBqbHgPQT

ConnectionError: (MaxRetryError("HTTPSConnectionPool(host='cdn-lfs-us-1.huggingface.co', port=443): Max retries exceeded with url: /repos/31/b7/31b74c3cc903ef02c515aa6049fe011e594e8ee2ed47e51e228020eacc6f98df/98caad07f1e1d17fede7734906d6881d4cb231e1d0cd2ebcf7fc8a6317eee4d6?response-content-disposition=attachment%3B+filename*%3DUTF-8%27%27model-00001-of-00002.safetensors%3B+filename%3D%22model-00001-of-00002.safetensors%22%3B&Expires=1717727128&Policy=eyJTdGF0ZW1lbnQiOlt7IkNvbmRpdGlvbiI6eyJEYXRlTGVzc1RoYW4iOnsiQVdTOkVwb2NoVGltZSI6MTcxNzcyNzEyOH19LCJSZXNvdXJjZSI6Imh0dHBzOi8vY2RuLWxmcy11cy0xLmh1Z2dpbmdmYWNlLmNvL3JlcG9zLzMxL2I3LzMxYjc0YzNjYzkwM2VmMDJjNTE1YWE2MDQ5ZmUwMTFlNTk0ZThlZTJlZDQ3ZTUxZTIyODAyMGVhY2M2Zjk4ZGYvOThjYWFkMDdmMWUxZDE3ZmVkZTc3MzQ5MDZkNjg4MWQ0Y2IyMzFlMWQwY2QyZWJjZjdmYzhhNjMxN2VlZTRkNj9yZXNwb25zZS1jb250ZW50LWRpc3Bvc2l0aW9uPSoifV19&Signature=JrX2mVDjWn84WtICKqfXymAFE~UR46ldVvMRdH57OC5XetSIXnOhOf-7XLg9RZm-fYd9XgNJQAYJJA7KcvMNGWbsZU5ZjTQDygIRAJBpSPQ9GeQcG37qttAionHBX77tWaDpEBa65oBqbHgPQTvjf3SfKMqwbHJGbuBQaW50sQtnVDhLzqkbv1z0FRHgubVRSH2H6CTNIP8P6SnHMnV9s8o1MCoN5nSi1mqcw9TA-4xAHZW2HWr-BO~P5deDPXEUOdQfu11UkNhLUdJHd3HBfPI5zrHNwpUA2-wAqAL-xgkcjmVA3BofxQ96JSjq8mfHPddXhYSHOp8dro5ILK01Vw__&Key-Pair-Id=KCD77M1F0VK2B (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x000001FCA92D2E00>: Failed to establish a new connection: [Errno 11001] getaddrinfo failed'))"), '(Request ID: a1591164-538d-48bb-94cd-68008004f197)')

In [None]:
text = "Quote: Imagination is more,"
device = "cuda:0"
inputs = tokenizer(text, return_tensors="pt").to(device)

outputs = model.generate(**inputs, max_new_tokens=20)
print(tokenizer.decode(outputs[0], skip_special_tokens=True))

In [None]:
text = "Quote: Imagination is more"
device = "cuda:0"
inputs = tokenizer(text, return_tensors="pt").to(device)

outputs = model.generate(**inputs, max_new_tokens=20)
print(tokenizer.decode(outputs[0], skip_special_tokens=True))

In [None]:
os.environ["WANDB_DISABLED"] = "false"

In [None]:
lora_config = LoraConfig(
    r = 8,
    target_modules = ["q_proj", "o_proj", "k_proj", "v_proj",
                      "gate_proj", "up_proj", "down_proj"],
    task_type = "CAUSAL_LM",
)

In [None]:
from datasets import load_dataset

data = load_dataset("Abirate/english_quotes")
data = data.map(lambda samples: tokenizer(samples["quote"]), batched=True)

In [None]:
data['train']['quote']

In [None]:
def formatting_func(example):
    text = f"Quote: {example['quote'][0]}\nAuthor: {example['author'][0]}"
    return [text]

In [None]:
data['train']

In [None]:
trainer = SFTTrainer(
    model=model,
    train_dataset=data["train"],
    args=transformers.TrainingArguments(
        per_device_train_batch_size=1,
        gradient_accumulation_steps=4,
        warmup_steps=2,
        max_steps=100,
        learning_rate=2e-4,
        fp16=True,
        logging_steps=1,
        output_dir="outputs",
        optim="paged_adamw_8bit"
    ),
    peft_config=lora_config,
    formatting_func=formatting_func,
)

In [None]:
trainer.train()

In [None]:
text = "Quote: A woman is like a tea bag;"
device = "cuda:0"
inputs = tokenizer(text, return_tensors="pt").to(device)

outputs = model.generate(**inputs, max_new_tokens=20)
print(tokenizer.decode(outputs[0], skip_special_tokens=True))

In [None]:
text = "Quote: Outside of a dog, a book is man's"
device = "cuda:0"
inputs = tokenizer(text, return_tensors="pt").to(device)

outputs = model.generate(**inputs, max_new_tokens=20)
print(tokenizer.decode(outputs[0], skip_special_tokens=True))

Quote: Outside of a dog, a book is man's best friend.
Author: Nicolas Chamfort
Quote: The most wasted of all days is one
