## Fine Tuning Gemma

### Setting up

In [None]:
%pip install -U transformers
%pip install -U datasets
%pip install -U accelerate
%pip install -U peft
%pip install -U trl
%pip install -U bitsandbytes
%pip install -U wandb

In [None]:
import torch
import pandas as pd
import numpy as np
import warnings
import json
import time

from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    DataCollatorForLanguageModeling,
    BitsAndBytesConfig,
    TrainingArguments,
    pipeline,
    logging,
)
from peft import LoraConfig, get_peft_model, AutoPeftModelForCausalLM, PeftModel
from trl import SFTTrainer

In [None]:
from huggingface_hub import notebook_login

notebook_login()

In [None]:
base_model = "google/gemma-2b"
new_model = "fortuneGem_gemma2b"
dataset_name = "junonnong/daily_horoscope_kr"

### Loading the model and tokenizer

In [None]:
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16,
    bnb_4bit_use_double_quant=True,
)

In [None]:
# Load model
model = AutoModelForCausalLM.from_pretrained(
    base_model,
    quantization_config=bnb_config,
    device_map="auto",
)

# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(base_model, trust_remote_code=True)

### Extracting the linear modules

In [None]:
import bitsandbytes as bnb

def find_all_linear_names(model):
    cls = bnb.nn.Linear4bit
    lora_module_names = set()
    for name, module in model.named_modules():
        if isinstance(module, cls):
            names = name.split('.')
            lora_module_names.add(names[0] if len(names) == 1 else names[-1])
    if 'lm_head' in lora_module_names:  # needed for 16 bit
        lora_module_names.remove('lm_head')
    return list(lora_module_names)

modules = find_all_linear_names(model)
modules

### Loading the dataset

In [None]:
!pip install -U datasets

In [None]:
from datasets import load_dataset

# 데이터셋 로드
dataset = load_dataset(dataset_name, data_files='datasets.csv', split="all")

# 데이터셋의 구조 확인
print(dataset)

In [None]:
def format_instruction(example):

    text = f"""user\n{example["instruction"]}\n{example["birthday"]} 천간지지: {example["birthday_saju"]}\n{example["Date"]} 천간지지: {example["fortune_saju"]}\nmodel\n{example["Output"]}"""

    return {'prompt': text}

dataset = dataset.map(format_instruction)

dataset

In [None]:
dataset = dataset.map(lambda samples: tokenizer(samples["prompt"]), batched=True)
dataset = dataset.train_test_split(test_size=0.2)
train_data = dataset["train"]
test_data = dataset["test"]

### Training the model

In [None]:
torch.cuda.empty_cache()

lora_config = LoraConfig(
    r=16,
    lora_dropout=0.1,
    target_modules=modules,
    task_type="CAUSAL_LM"
)

model = get_peft_model(model, peft_config)

trainer = SFTTrainer(
    model=model,
    train_dataset=train_data,
    eval_dataset=test_data,
    dataset_text_field="prompt",
    peft_config=lora_config,
    args=TrainingArguments(
        per_device_train_batch_size=2,
        gradient_accumulation_steps=4,
        warmup_steps=100,
        learning_rate=2e-5,
        fp16=True,
        logging_steps=100,
        output_dir="outputs",
    )
)

trainer.train()

In [None]:
def get_completion(query: str, model, tokenizer):

  prompt_template = """user
  {query}

  model
  """
  prompt = prompt_template.format(query=query)
  encodeds = tokenizer(prompt, return_tensors="pt")
  model_inputs = encodeds.to("cuda:0")
  generated_ids = model.generate(**encodeds,max_new_tokens=500)
  decoded = tokenizer.decode(generated_ids[0], skip_special_tokens=True)
  return decoded

In [None]:
query="""1997년10월29일생의 천간지지와 2024년9월27일의 천간지지를 토대로 해당 날짜의 운세를 10줄 이하로만 알려줘. 다른 설명은 필요 없어.
1997-10-29 천간지지: 정축년 경술월 갑진일
2024-09-27 천간지지: 갑진년 계유월 갑오일
"""

result = get_completion(query=query,
                        model=trainer.model,
                        tokenizer=tokenizer)
print(result)

### Merging the Base model with Adapter

In [None]:
trainer.model.save_pretrained("new_adapter_model")

base_model = AutoModelForCausalLM.from_pretrained(base_model,
                                                  low_cpu_mem_usage=True,
                                                  return_dict=True,
                                                  torch_dtype=torch.float16,
                                                  device_map={"":0}
)

model = PeftModel.from_pretrained(base_model, "new_adapter_model")

model = model.merge_and_unload()

### Saving the model

In [None]:
model.save_pretrained(new_model)
model.push_to_hub(new_model, use_temp_dir=False)

tokenizer.save_pretrained(new_model)
tokenizer.push_to_hub(new_model, use_temp_dir=False)

### Convert model to MediaPipe format for on-device deployment

In [None]:
!pip install mediapipe
!pip install torch

In [None]:
from mediapipe.tasks.python.genai import converter

config = converter.ConversionConfig(
  input_ckpt='/content/fortuneGem_gemma2b',
  ckpt_format="safetensors",
  model_type="GEMMA_2B",
  backend='gpu',
  output_dir='/content/fortuneGem_gemma2b/intermediate/fine_tuned_fortune_gemma2b',
  combine_file_only=False,
  vocab_model_file="/content/fortuneGem_gemma2b",
  output_tflite_file=f'/content/fortuneGem_gemma2b/fine_tuned_fortune_gemma2b/fortunegem.bin',
)

converter.convert_checkpoint(config)

print("Model converted successfully.")