<a href="https://colab.research.google.com/github/UmarIgan/Machine-Learning/blob/master/DPO_Fine_tuning_Trendyol_Llama_2_Model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# DPO Fine-tuning Trendyol Llama-2 Mode

Thanks to [@maximelabonne](https://twitter.com/maximelabonne) for easy to use script.

In [1]:
!pip install -q datasets trl peft bitsandbytes sentencepiece wandb accelerate

[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/536.7 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━━━━━━━━━━[0m[90m╺[0m[90m━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m204.8/536.7 kB[0m [31m6.1 MB/s[0m eta [36m0:00:01[0m[2K     [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m [32m532.5/536.7 kB[0m [31m9.0 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m536.7/536.7 kB[0m [31m7.5 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m155.3/155.3 kB[0m [31m8.1 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m183.4/183.4 kB[0m [31m9.4 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m105.0/105.0 MB[0m [31m16.7 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.2/2.2 MB[0m [31m78.6 MB/s[0m eta [36m0:00:00[0m
[2K  

In [None]:
import os
import gc
import torch

import transformers
from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments, BitsAndBytesConfig
from datasets import load_dataset
from peft import LoraConfig, PeftModel, get_peft_model, prepare_model_for_kbit_training
from trl import DPOTrainer
import bitsandbytes as bnb
from google.colab import userdata
import wandb

# Defined in the secrets tab in Google Colab
hf_token = userdata.get('HF_TOKEN')
wb_token = userdata.get('wandb')
wandb.login(key=wb_token)

model_name = "Trendyol/Trendyol-LLM-7b-chat-v0.1"
new_model = "Trendyol-LLM-7b-chat-v0.1-DPO"

[34m[1mwandb[0m: W&B API key is configured. Use [1m`wandb login --relogin`[0m to force relogin
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


## Format dataset

In [None]:
def chatml_format(example):
    # Format instruction
    message = {"role": "user", "content": example['instruction']}
    prompt = tokenizer.apply_chat_template([message], tokenize=False, add_generation_prompt=True)

    # Format chosen answer
    chosen = example['chosen'] + "<|im_end|>\n"

    # Format rejected answer
    rejected = example['rejected'] + "<|im_end|>\n"

    return {
        "prompt": prompt,
        "chosen": chosen,
        "rejected": rejected,
    }

# Load dataset
dataset = load_dataset("umarigan/falcon_feedback_instraction_Turkish")['train']

# Save columns
original_columns = dataset.column_names

# Tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_name)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "left"

# Format dataset
dataset = dataset.map(
    chatml_format,
    remove_columns=original_columns
)

# Print sample
dataset[1]

Downloading readme:   0%|          | 0.00/678 [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/1.84M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/3139 [00:00<?, ? examples/s]

tokenizer_config.json:   0%|          | 0.00/2.12k [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/718k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/547 [00:00<?, ?B/s]

Map:   0%|          | 0/3139 [00:00<?, ? examples/s]

{'chosen': 'Ben senin gölgesinim.<|im_end|>\n',
 'rejected': 'Ben kaçınılmaz bir ruhum, erişebildiğin bir varlık, asla çözemeyeceğin bir bilmeceyim.<|im_end|>\n',
 'prompt': '<s>[INST] Seni sürekli takip ediyorum ve her hareketini kopyalıyorum ama bana dokunamazsın ya da yakalayamazsın. [/INST]'}

In [None]:
dataset

Dataset({
    features: ['chosen', 'rejected', 'prompt'],
    num_rows: 3139
})

## Train model with DPO

In [None]:
# LoRA configuration
peft_config = LoraConfig(
    r=16,
    lora_alpha=16,
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM",
    target_modules=['v_proj', 'q_proj']
)

# Model to fine-tune
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    torch_dtype=torch.float16,
    load_in_4bit=True
)
model.config.use_cache = False

# Reference model
ref_model = AutoModelForCausalLM.from_pretrained(
    model_name,
    torch_dtype=torch.float16,
    load_in_4bit=True
)

config.json:   0%|          | 0.00/698 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/23.9k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/6 [00:00<?, ?it/s]

model-00001-of-00006.safetensors:   0%|          | 0.00/4.97G [00:00<?, ?B/s]

model-00002-of-00006.safetensors:   0%|          | 0.00/4.92G [00:00<?, ?B/s]

model-00003-of-00006.safetensors:   0%|          | 0.00/4.86G [00:00<?, ?B/s]

model-00004-of-00006.safetensors:   0%|          | 0.00/4.86G [00:00<?, ?B/s]

model-00005-of-00006.safetensors:   0%|          | 0.00/4.86G [00:00<?, ?B/s]

model-00006-of-00006.safetensors:   0%|          | 0.00/2.88G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/6 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/183 [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/6 [00:00<?, ?it/s]

In [None]:
# Training arguments
training_args = TrainingArguments(
    per_device_train_batch_size=4,
    gradient_accumulation_steps=4,
    gradient_checkpointing=True,
    learning_rate=5e-5,
    lr_scheduler_type="cosine",
    max_steps=200,
    save_strategy="no",
    logging_steps=1,
    output_dir=new_model,
    optim="paged_adamw_32bit",
    warmup_steps=100,
    fp16=True,
    report_to="wandb",
)

# Create DPO trainer
dpo_trainer = DPOTrainer(
    model,
    #ref_model,
    args=training_args,
    train_dataset=dataset,
    tokenizer=tokenizer,
    peft_config=peft_config,
    beta=0.1,
    max_prompt_length=1024,
    max_length=1536,
)

# Fine-tune model with DPO
dpo_trainer.train()

## Upload model

In [None]:
# Save artifacts
dpo_trainer.model.save_pretrained("final_checkpoint")
tokenizer.save_pretrained("final_checkpoint")

# Flush memory
del dpo_trainer, model, ref_model
gc.collect()
torch.cuda.empty_cache()

# Reload model in FP16 (instead of NF4)
base_model = AutoModelForCausalLM.from_pretrained(
    model_name,
    return_dict=True,
    torch_dtype=torch.float16,
)
tokenizer = AutoTokenizer.from_pretrained(model_name)

# Merge base model with the adapter
model = PeftModel.from_pretrained(base_model, "final_checkpoint")
model = model.merge_and_unload()

# Save model and tokenizer
model.save_pretrained(new_model)
tokenizer.save_pretrained(new_model)

# Push them to the HF Hub
model.push_to_hub(new_model, use_temp_dir=False, token=hf_token)
tokenizer.push_to_hub(new_model, use_temp_dir=False, token=hf_token)

Loading checkpoint shards:   0%|          | 0/6 [00:00<?, ?it/s]

model-00003-of-00003.safetensors:   0%|          | 0.00/3.78G [00:00<?, ?B/s]

model-00002-of-00003.safetensors:   0%|          | 0.00/4.95G [00:00<?, ?B/s]

Upload 3 LFS files:   0%|          | 0/3 [00:00<?, ?it/s]

model-00001-of-00003.safetensors:   0%|          | 0.00/4.95G [00:00<?, ?B/s]

README.md:   0%|          | 0.00/5.18k [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/718k [00:00<?, ?B/s]

CommitInfo(commit_url='https://huggingface.co/umarigan/Trendyol-LLM-7b-chat-v0.1-DPO/commit/e99122afd39d3e526834bcf391c5eb4c7809e9f0', commit_message='Upload tokenizer', commit_description='', oid='e99122afd39d3e526834bcf391c5eb4c7809e9f0', pr_url=None, pr_revision=None, pr_num=None)

## Inference

In [None]:
# Format prompt
message = [
    {"role": "user", "content": "Büyük Dil modeli ne demektir"}
]
tokenizer = AutoTokenizer.from_pretrained(new_model)
prompt = tokenizer.apply_chat_template(message, add_generation_prompt=True, tokenize=False)

# Create pipeline
pipeline = transformers.pipeline(
    "text-generation",
    model=new_model,
    tokenizer=tokenizer
)

# Generate text
sequences = pipeline(
    prompt,
    do_sample=True,
    temperature=0.7,
    top_p=0.9,
    num_return_sequences=1,
    max_length=200,
)
print(sequences[0]['generated_text'])

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.


<s>[INST] Büyük Dil modeli ne demektir [/INST]  Büyük dil modeli, büyük miktarda doğal dil verisini kullanarak insan benzeri metin oluşturma yeteneğine sahip bir yapay sinir ağıdır.


## Custom Test

In [4]:
# Use a pipeline as a high-level helper
from transformers import pipeline

pipe = pipeline("text-generation", model="umarigan/Trendyol-LLM-7b-chat-v0.1-DPO")

# Generate text
sequences = pipe(
    "büyük dil modellerinin finans alanındaki kullanımları nelerdir",
    do_sample=True,
    temperature=0.7,
    top_p=0.9,
    num_return_sequences=1,
    max_length=200,
)
print(sequences[0]['generated_text'])

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.


büyük dil modellerinin finans alanındaki kullanımları nelerdir?
Çok büyük dil modelleri, özellikle de Transformer gibi, karmaşık dil görevlerinin üstesinden gelmek için tasarlanmışlardır. Bu, finansal piyasalardaki veri işleme, fiyat tahmini ve analizleri, finansal haberler ve raporlama gibi süreçleri içerir. Ayrıca, büyük dil modelleri, doğal dil işleme, metin sınıflandırma ve soru cevaplama gibi görevlerin yanı sıra, müşteri hizmetleri gibi insan etkileşimi gerektiren finansal hizmetlerde de kullanılmaktadır.
