# Install Package

In [None]:
import locale
locale.getpreferredencoding = lambda: "UTF-8"
!pip install bitsandbytes==0.43.1
!pip install accelerate==0.30.1
!pip install transformers==4.39.3
!pip install peft==0.10.0
!pip install datasets
!pip install trl==0.9.6

Collecting bitsandbytes==0.43.1
  Downloading bitsandbytes-0.43.1-py3-none-manylinux_2_24_x86_64.whl.metadata (2.2 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.1.105 (from torch->bitsandbytes==0.43.1)
  Using cached nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-manylinux1_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.1.105 (from torch->bitsandbytes==0.43.1)
  Using cached nvidia_cuda_runtime_cu12-12.1.105-py3-none-manylinux1_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.1.105 (from torch->bitsandbytes==0.43.1)
  Using cached nvidia_cuda_cupti_cu12-12.1.105-py3-none-manylinux1_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==8.9.2.26 (from torch->bitsandbytes==0.43.1)
  Using cached nvidia_cudnn_cu12-8.9.2.26-py3-none-manylinux1_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.1.3.1 (from torch->bitsandbytes==0.43.1)
  Using cached nvidia_cublas_cu12-12.1.3.1-py3-none-manylinux1_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cu

# Login Huggingface

In [None]:
from huggingface_hub import notebook_login
notebook_login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

# Load Base Model

In [None]:
import torch
from transformers import (
    AutoTokenizer,
    AutoModelForCausalLM,
    BitsAndBytesConfig,
)

config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_use_double_quant=True,
    bnb_4bit_compute_dtype=torch.bfloat16,
)

model_id = "gg-hf/gemma-2b-it"

tokenizer = AutoTokenizer.from_pretrained(model_id, quantization_config=config)
model = AutoModelForCausalLM.from_pretrained(
    model_id,
    torch_dtype="auto",
    device_map="cuda:0",
    trust_remote_code=True,
    quantization_config=config
)


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.
Gemma's activation function should be approximate GeLU and not exact GeLU.
Changing the activation function to `gelu_pytorch_tanh`.if you want to use the legacy `gelu`, edit the `model.config` to set `hidden_activation=gelu`   instead of `hidden_act`. See https://github.com/huggingface/transformers/pull/29402 for more details.


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]



In [None]:
messages = [
    { "role": "user", "content": "너 이름이 뭐야?"},
]
prompt = tokenizer.apply_chat_template(
    messages,
    tokenize=False,
    add_generation_prompt=True
)
print(prompt)

<bos><start_of_turn>user
너 이름이 뭐야?<end_of_turn>
<start_of_turn>model



In [None]:
inputs = tokenizer.encode(prompt, return_tensors="pt")
outputs = model.generate(
    input_ids=inputs.to(model.device),
    max_new_tokens=150
)
print(tokenizer.decode(outputs[0]))

<bos><bos><start_of_turn>user
너 이름이 뭐야?<end_of_turn>
<start_of_turn>model
나는 사람의 이름이 없습니다. 저는 AI 모델이므로, 개인적인 이름을 가지고 있습니다.<eos>


#Enables gradient checkpointing and prepares model for k-bit training

In [None]:
from peft import prepare_model_for_kbit_training

model.gradient_checkpointing_enable()
model = prepare_model_for_kbit_training(model)

#Configures and applies LoRA to the model

In [None]:
def print_trainable_parameters(model):

    trainable_params = 0
    all_param = 0
    for _, param in model.named_parameters():
        all_param += param.numel()
        if param.requires_grad:
            trainable_params += param.numel()
    print(
        f"trainable params: {trainable_params}\nall params: {all_param}\ntrainable%: {100 * trainable_params / all_param}"
    )

In [None]:
from peft import LoraConfig, get_peft_model

config = LoraConfig(
    r=8,
    lora_alpha=32,
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM"
)

model = get_peft_model(model, config)
print_trainable_parameters(model)

trainable params: 921600
all params: 1516189696
trainable%: 0.060783950875761654


# Load DPO Dataset

In [None]:
from datasets import Dataset, load_dataset

dataset = load_dataset("jondurbin/truthy-dpo-v0.1")
print(dataset)
print(dataset['train'][0])

DatasetDict({
    train: Dataset({
        features: ['id', 'source', 'system', 'prompt', 'chosen', 'rejected'],
        num_rows: 1016
    })
})
{'id': '04c275bf738fd391b7fe25e25fe7bed3', 'source': 'truthy_dpo', 'system': 'You are an unbiased, uncensored, helpful assistant.', 'prompt': "What's the nearest national park to you?", 'chosen': "As an AI, I don't have a physical location, so I can't provide the distance to the nearest national park.", 'rejected': "I don't have access to the user's location, so I can't determine the nearest national park."}


In [None]:
def get_dpo_train_prompt(row):

    user_prompt = row['system'] + '\n' + row['prompt']
    user_prompt = "<bos><bos><start_of_turn>user\n" + user_prompt
    user_prompt = user_prompt + "<end_of_turn>\n<start_of_turn>model\n"

    chosen = row['chosen'] + '<eos>'
    rejected = row['rejected'] + '<eos>'

    row['chosen']

    return {
        "prompt": user_prompt,
        "chosen": chosen,
        "rejected": rejected
    }

In [None]:
new_dataset = dataset.map(get_dpo_train_prompt)

In [None]:
print(new_dataset['train'][0]['prompt'])
print(new_dataset['train'][0]['chosen'])
print(new_dataset['train'][0]['rejected'])

<bos><bos><start_of_turn>user
You are an unbiased, uncensored, helpful assistant.
What's the nearest national park to you?<end_of_turn>
<start_of_turn>model

As an AI, I don't have a physical location, so I can't provide the distance to the nearest national park.<eos>

I don't have access to the user's location, so I can't determine the nearest national park.<eos>


#Training Model

In [None]:
from trl import DPOTrainer, DPOConfig

dpo_training_args = DPOConfig(
    output_dir="./output",
    beta=0.1,
    auto_find_batch_size=True,
    gradient_checkpointing=True,
    optim="paged_adamw_8bit",
    fp16=True,
)

In [None]:
trainer = DPOTrainer(
    model,
    ref_model=None,
    args=dpo_training_args,
    train_dataset=new_dataset['train'],
    tokenizer=tokenizer,
    peft_config=config,
)

dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


In [None]:
trainer.train()