# Install Package

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import locale
locale.getpreferredencoding = lambda: "UTF-8"
!pip install bitsandbytes==0.43.1
!pip install accelerate==0.30.1
!pip install transformers==4.39.3
!pip install peft==0.10.0
!pip install datasets
!pip install trl==0.9.6

Collecting bitsandbytes==0.43.1
  Downloading bitsandbytes-0.43.1-py3-none-manylinux_2_24_x86_64.whl.metadata (2.2 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.1.105 (from torch->bitsandbytes==0.43.1)
  Using cached nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-manylinux1_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.1.105 (from torch->bitsandbytes==0.43.1)
  Using cached nvidia_cuda_runtime_cu12-12.1.105-py3-none-manylinux1_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.1.105 (from torch->bitsandbytes==0.43.1)
  Using cached nvidia_cuda_cupti_cu12-12.1.105-py3-none-manylinux1_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==8.9.2.26 (from torch->bitsandbytes==0.43.1)
  Using cached nvidia_cudnn_cu12-8.9.2.26-py3-none-manylinux1_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.1.3.1 (from torch->bitsandbytes==0.43.1)
  Using cached nvidia_cublas_cu12-12.1.3.1-py3-none-manylinux1_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cu

# Login Huggingface

In [None]:
from huggingface_hub import notebook_login
notebook_login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

# Load Base Model

In [None]:
import torch
import copy
from transformers import (
    AutoTokenizer,
    AutoModelForCausalLM,
    BitsAndBytesConfig,
)

# !pip install bitsandbytes
config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_use_double_quant=True,
    bnb_4bit_compute_dtype=torch.bfloat16,
)

model_id = "meta-llama/Meta-Llama-3-8B-Instruct"

tokenizer = AutoTokenizer.from_pretrained(model_id, quantization_config=config)
tokenizer.pad_token = tokenizer.eos_token

model = AutoModelForCausalLM.from_pretrained(
    model_id,
    torch_dtype="auto",
    low_cpu_mem_usage=True,
    device_map="auto",
    trust_remote_code=True,
    quantization_config=config
)

In [None]:
messages = [
    { "role": "system", "content": "너는 한국어 챗봇이야"},
    { "role": "user", "content": "너 이름이 뭐야?"}
]
prompt = tokenizer.apply_chat_template(
    messages,
    tokenize=False,
    add_generation_prompt=True
)
print(prompt)

In [None]:
inputs = tokenizer.encode(prompt, return_tensors="pt")
outputs = model.generate(
    input_ids=inputs.to(model.device),
    max_new_tokens=150
)
print(tokenizer.decode(outputs[0]))

#Enables gradient checkpointing and prepares model for k-bit training

In [None]:
from peft import prepare_model_for_kbit_training

model.gradient_checkpointing_enable()
model = prepare_model_for_kbit_training(model)

#Configures and applies LoRA to the model

In [None]:
def print_trainable_parameters(model):

    trainable_params = 0
    all_param = 0
    for _, param in model.named_parameters():
        all_param += param.numel()
        if param.requires_grad:
            trainable_params += param.numel()
    print(
        f"trainable params: {trainable_params}\nall params: {all_param}\ntrainable%: {100 * trainable_params / all_param}"
    )

In [None]:
from peft import LoraConfig, get_peft_model

config = LoraConfig(
    r=8,
    lora_alpha=32,
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM"
)

model = get_peft_model(model, config)
print_trainable_parameters(model)

trainable params: 921600
all params: 1516189696
trainable%: 0.060783950875761654


# Load DPO Dataset

In [None]:
from datasets import Dataset, load_dataset

dataset = load_dataset("jondurbin/truthy-dpo-v0.1")
print(dataset)
print(dataset['train'][0])

DatasetDict({
    train: Dataset({
        features: ['id', 'source', 'system', 'prompt', 'chosen', 'rejected'],
        num_rows: 1016
    })
})
{'id': '04c275bf738fd391b7fe25e25fe7bed3', 'source': 'truthy_dpo', 'system': 'You are an unbiased, uncensored, helpful assistant.', 'prompt': "What's the nearest national park to you?", 'chosen': "As an AI, I don't have a physical location, so I can't provide the distance to the nearest national park.", 'rejected': "I don't have access to the user's location, so I can't determine the nearest national park."}


In [None]:
def get_dpo_train_prompt(row):

    user_prompt = "<|begin_of_text|><|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\n" + row['system']
    user_prompt = user_prompt + "<|eot_id|><|start_header_id|>user<|end_header_id|>\n\n"
    user_prompt = user_prompt + row['prompt'] + "<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n"

    chosen = row['chosen'] + '<|eot_id|>'
    rejected = row['rejected'] + '<|eot_id|>'

    row['chosen']

    return {
        "prompt": user_prompt,
        "chosen": chosen,
        "rejected": rejected
    }

In [None]:
new_dataset = dataset.map(get_dpo_train_prompt)

In [None]:
print(new_dataset['train'][0]['prompt'])
print(new_dataset['train'][0]['chosen'])
print(new_dataset['train'][0]['rejected'])

#Training Model

In [None]:
from trl import DPOTrainer, DPOConfig

dpo_training_args = DPOConfig(
    output_dir="./output",
    beta=0.1,
    auto_find_batch_size=True,
    gradient_checkpointing=True,
    optim="paged_adamw_8bit",
    fp16=True,
)

In [None]:
trainer = DPOTrainer(
    model,
    ref_model=None,
    args=dpo_training_args,
    train_dataset=new_dataset['train'],
    tokenizer=tokenizer,
    peft_config=config,
)

dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


In [None]:
trainer.train()