In [None]:
!pip install trl
!pip install bitsandbytes
!pip install llm-blender
!git clone https://github.com/arcee-ai/mergekit.git
%cd mergekit
!pip install -e .

In [None]:
from google.colab import drive
from transformers import AutoTokenizer, AutoModelForCausalLM
from datasets import Dataset, concatenate_datasets,load_dataset
from peft import LoraConfig, TaskType, PeftModel
from trl import DPOConfig, DPOTrainer
from tqdm import tqdm
import torch
import pandas as pd
import os
import json
import matplotlib.pyplot as plt
import seaborn as sns
import gc
import numpy as np
import random

In [None]:
drive.mount('/content/drive')

In [None]:
CHECKPOINTS_PHASE_1_BASE_DIR = "/content/drive/MyDrive/KO_Project_2/checkpoints_phase_1"
CHECKPOINTS_PHASE_2_BASE_DIR = "/content/drive/MyDrive/KO_Project_2/checkpoints_phase_2"
MODELS_PHASE_1_SAVE_BASE_DIR = "/content/drive/MyDrive/KO_Project_2/models_phase_1"
MODELS_PHASE_2_SAVE_BASE_DIR = "/content/drive/MyDrive/KO_Project_2/models_phase_2"
LOGS_PHASE_1_BASE_DIR = "/content/drive/MyDrive/KO_Project_2/logs_phase_1"
LOGS_PHASE_2_BASE_DIR = "/content/drive/MyDrive/KO_Project_2/logs_phase_2"
PLOTS_PHASE_1_BASE_DIR = "/content/drive/MyDrive/KO_Project_2/plots_phase_1"
PLOTS_PHASE_2_BASE_DIR = "/content/drive/MyDrive/KO_Project_2/plots_phase_2"

In [None]:
DATASET = load_dataset("argilla/ultrafeedback-binarized-preferences-cleaned")

In [None]:
MODEL_ID = "Qwen/Qwen2.5-0.5B-Instruct"
TOKENIZER = AutoTokenizer.from_pretrained(MODEL_ID)
if TOKENIZER.pad_token is None:
    TOKENIZER.pad_token = TOKENIZER.eos_token

TOKENIZER.padding_side = "left"

In [None]:
# Veri setini prompt uzunluğu, prompt + seçilmiş uzunluğu ve prompt + reddedilmiş uzunluğu üzerine filtrelenmesi üzerine kullanılır.
def helper_filter_dpo_dataset(example):

    chosen_messages = example['chosen']
    rejected_messages = example['rejected']

    prompt_messages = chosen_messages[:-1]

    prompt_len = len(TOKENIZER.apply_chat_template(prompt_messages, tokenize=True))

    chosen_full_len = len(TOKENIZER.apply_chat_template(chosen_messages, tokenize=True))

    rejected_full_len = len(TOKENIZER.apply_chat_template(rejected_messages, tokenize=True))

    if prompt_len > 256:
        return False
    if chosen_full_len > 512:
        return False
    if rejected_full_len > 512:
        return False

    return True

In [None]:
# Veri setinin filtrelenmesini üstteki fonksiyonu da kullanarak tamamlar.
def helper_apply_filtering(dataset, name="Dataset"):
    original_count = len(dataset)

    filtered_dataset = dataset.filter(helper_filter_dpo_dataset, num_proc=os.cpu_count())

    new_count = len(filtered_dataset)
    removed_count = original_count - new_count

    print(f"--- {name} ---")
    print(f"Orijinal: {original_count}")
    print(f"Kalan   : {new_count}")
    print(f"Atılan  : {removed_count} (Oran: {removed_count/original_count:.2%})\n")

    return filtered_dataset

In [None]:
DATASET = helper_apply_filtering(DATASET["train"], name="Dataset")

In [None]:
DATASET_SELECTED_SUBSETS_LIST = ["sharegpt", "evol_instruct", "ultrachat", "flan_v2_niv2"]

In [None]:
# Veri kümesinden alt kümeleri elde etmek amacıyla bu fonksiyon kullanılır.
def helper_get_dataset_subset(subset_name, subset_size):
  subset_ds = DATASET.filter(lambda x: x["source"] == subset_name)

  subset_ds = subset_ds.shuffle(42)

  if len(subset_ds) >= subset_size:
    subset_ds = subset_ds.select(range(subset_size))
    print(f"{subset_name} alt kümesi için {subset_size} adet veri seçildi")
  else:
    subset_ds = subset_ds.select(range(len(subset_ds)))
    print(f"{subset_name} alt kümesi için {len(subset_ds)} adet veri seçildi. {subset_size} kadar veri yok.")
  return subset_ds

In [None]:
# Her bir alt küme için Eğitim, Validasyon ve Test kümeleri elde etmek amacıyla kullanılır.
def helper_get_subset_train_test_validation(subset_ds, subset_name):
  train_test_and_valid = subset_ds.train_test_split(test_size=0.2, seed=42, shuffle=True)
  test_and_valid = train_test_and_valid["test"].train_test_split(test_size=0.5, seed=42, shuffle=True)

  train = train_test_and_valid['train']
  validation = test_and_valid['train']
  test = test_and_valid['test']

  print(f"---{subset_name} alt kümesi için---")
  print(f"Train sayısı     : {len(train)}")
  print(f"Validation sayısı: {len(validation)}")
  print(f"Test sayısı      : {len(test)}\n")

  return train, validation, test

In [None]:
SUBSET_DATASETS_LIST = [helper_get_dataset_subset(subset_name, 2500) for subset_name in DATASET_SELECTED_SUBSETS_LIST]

In [None]:
SUBSET_1_TRAIN, SUBSET_1_VAL, SUBSET_1_TEST = helper_get_subset_train_test_validation(SUBSET_DATASETS_LIST[0], "sharegpt")
SUBSET_2_TRAIN, SUBSET_2_VAL, SUBSET_2_TEST = helper_get_subset_train_test_validation(SUBSET_DATASETS_LIST[1], "evol_instruct")
SUBSET_3_TRAIN, SUBSET_3_VAL, SUBSET_3_TEST = helper_get_subset_train_test_validation(SUBSET_DATASETS_LIST[2], "ultrachat")
SUBSET_4_TRAIN, SUBSET_4_VAL, SUBSET_4_TEST= helper_get_subset_train_test_validation(SUBSET_DATASETS_LIST[3], "flan_v2_niv2")

In [None]:
TOTAL_TRAIN = concatenate_datasets([SUBSET_1_TRAIN, SUBSET_2_TRAIN, SUBSET_3_TRAIN, SUBSET_4_TRAIN]).shuffle(42)
TOTAL_VAL = concatenate_datasets([SUBSET_1_VAL, SUBSET_2_VAL, SUBSET_3_VAL, SUBSET_4_VAL]).shuffle(42)
TOTAL_TEST = concatenate_datasets([SUBSET_1_TEST, SUBSET_2_TEST, SUBSET_3_TEST, SUBSET_4_TEST]).shuffle(42)

In [None]:
# Müfredat Öğrenmesine uygun olacak biçimde daha önceden elde edilmiş alt kümeler "kolay" ve "zor" olarak ikiye ayrılır.
def helper_prepare_curriculum_subset_data(subset_train, split_ratio=0.5):

    subset_train_with_margin = subset_train.map(lambda x: {"margin": x["chosen-rating"] - x["rejected-rating"]})

    subset_train_with_margin_df = subset_train_with_margin.to_pandas()

    subset_train_with_margin_df_sorted = subset_train_with_margin_df.sort_values(by="margin", ascending=False)

    split_index = int(len(subset_train_with_margin_df_sorted) * split_ratio)

    subset_train_easy_df = subset_train_with_margin_df_sorted.iloc[:split_index]

    subset_train_hard_df = subset_train_with_margin_df_sorted.iloc[split_index:]

    print(f"Kolay Örnek Sayısı (Phase 1): {len(subset_train_easy_df)} (Ort. Margin: {subset_train_easy_df['margin'].mean():.2f})")
    print(f"Zor Örnek Sayısı (Phase 2): {len(subset_train_hard_df)} (Ort. Margin: {subset_train_hard_df['margin'].mean():.2f}\n")

    subset_train_easy = Dataset.from_pandas(subset_train_easy_df)
    subset_train_hard = Dataset.from_pandas(subset_train_hard_df)

    return subset_train_easy, subset_train_hard

In [None]:
SUBSET_1_TRAIN_EASY, SUBSET_1_TRAIN_HARD = helper_prepare_curriculum_subset_data(SUBSET_1_TRAIN, split_ratio=0.5)
SUBSET_2_TRAIN_EASY, SUBSET_2_TRAIN_HARD = helper_prepare_curriculum_subset_data(SUBSET_2_TRAIN, split_ratio=0.5)
SUBSET_3_TRAIN_EASY, SUBSET_3_TRAIN_HARD = helper_prepare_curriculum_subset_data(SUBSET_3_TRAIN, split_ratio=0.5)
SUBSET_4_TRAIN_EASY, SUBSET_4_TRAIN_HARD = helper_prepare_curriculum_subset_data(SUBSET_4_TRAIN, split_ratio=0.5)
TOTAL_TRAIN_EASY, TOTAL_TRAIN_HARD = helper_prepare_curriculum_subset_data(TOTAL_TRAIN, split_ratio=0.5)

In [None]:
# Eğitim sürecinde kullanılacak global değerler ayarlanmıştır. PEFT_CONFIG = Modellerin Faz 1 ve Faz 2 DPO eğitimleri sürecinde eğitilen LoRA katmanlarının config'ini temsil eder.
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
DTYPE = torch.bfloat16 if torch.cuda.is_bf16_supported() else torch.float16

IS_SUPPORT_BF16 = torch.cuda.is_available() and torch.cuda.is_bf16_supported()
IS_SUPPORT_FP16 = torch.cuda.is_available() and not IS_SUPPORT_BF16


PEFT_CONFIG = LoraConfig(
    task_type=TaskType.CAUSAL_LM,
    inference_mode=False,
    r=16,
    lora_alpha=32,
    lora_dropout=0.05,
    bias="none",
    target_modules=[
        "q_proj",
        "k_proj",
        "v_proj",
        "o_proj",
        "gate_proj",
        "up_proj",
        "down_proj"
    ]
)

In [None]:
# Modellerin Faz 1 ve Faz 2 DPO eğitimi sürecinde kullanılacak DPO ayarlamalarını fazlara göre şekillenecek biçimde üretilmesini sağlar.
def helper_generate_dpo_training_args(dir, phase_select):
  return DPOConfig(
    output_dir= dir,
    num_train_epochs=1,
    per_device_train_batch_size=2,

    report_to="tensorboard",
    gradient_checkpointing=True,
    gradient_checkpointing_kwargs={"use_reentrant": False},
    beta=0.1 if phase_select==1 else 0.08,
    gradient_accumulation_steps=4,
    learning_rate=5e-6 if phase_select==1 else 3e-6,
    lr_scheduler_type="cosine",
    warmup_ratio=0.1,
    bf16=IS_SUPPORT_BF16,
    fp16=IS_SUPPORT_FP16,
    remove_unused_columns=True,
    max_length=512,
    max_prompt_length=256,
    logging_strategy="steps",
    logging_steps=10,
    save_strategy="steps",
    save_steps=30,
    eval_strategy="steps",
    eval_steps=30,
    per_device_eval_batch_size=2,
    eval_accumulation_steps=1,
    load_best_model_at_end=True,
    metric_for_best_model="eval_rewards/margins",
    greater_is_better=True,
    optim="paged_adamw_32bit",
)

In [None]:
# Baz model oluşturulmasını sağlar.
def helper_create_base_model(model_id = MODEL_ID):
  base_model = AutoModelForCausalLM.from_pretrained(model_id, dtype=DTYPE, use_cache=False).to(DEVICE)
  return base_model

In [None]:
# Verilen DPO ayarlaması, eğitim ve validasyon setleri ve model için eğitim fazına göre değişiklik gösterecek şekilde eğiticiyi oluşturur.
def helper_generate_dpo_trainer(training_args, training_dataset, val_dataset, phase_select, model=None):

  if model is None and phase_select == 1:
    base_model = helper_create_base_model()
  elif phase_select == 2:
    base_model = model
  else:
    print("Fonksiyon kullanımında hata tespit edildi.")

  base_model.enable_input_require_grads()
  base_model.config.use_cache = False

  return DPOTrainer(
    model=base_model,
    ref_model=None,
    args=training_args,
    train_dataset=training_dataset,
    processing_class=TOKENIZER,
    peft_config=PEFT_CONFIG if phase_select==1 else None,
    eval_dataset=val_dataset,
)

In [None]:
# Eğitim sonucu elde edilen LoRA adapterların kaydedilmesini sağlar.
def helper_save_adapter(trainer, dir):
  trainer.save_model(dir)

In [None]:
# Kaydedilen LoRa adapterların geri yüklenmesini sağlar.
def helper_load_model(dir):
  base_model = helper_create_base_model()
  adapter_path = dir
  return PeftModel.from_pretrained(base_model, adapter_path, is_trainable=True)

In [None]:
# Verilen model üzerinden tek bir prompt için çıktı üretimini gerçekleştirir.
@torch.inference_mode()
def helper_generate_output_for_single_prompt(raw_prompt, model_and_tokenizer_path):

  model = AutoModelForCausalLM.from_pretrained(model_and_tokenizer_path, dtype=DTYPE).to(DEVICE)

  tokenizer = AutoTokenizer.from_pretrained(model_and_tokenizer_path)

  model.eval()

  messages = [{"content": raw_prompt, "role": "user"}]

  formatted_prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)

  tokenized_inputs = tokenizer(
        formatted_prompt,
        return_tensors="pt",
        truncation=True,
        add_special_tokens=False
    ).to(DEVICE)

  input_ids_tensor = tokenized_inputs["input_ids"]

  outputs = model.generate(
      **tokenized_inputs,
      max_new_tokens=256,
      do_sample=True,
      temperature=0.7,
      top_p=0.95,
      eos_token_id=tokenizer.eos_token_id,
  )


  response = outputs[0][input_ids_tensor.shape[-1]:]
  decoded_response = tokenizer.decode(response, skip_special_tokens=True)

  del model
  del tokenizer
  torch.cuda.empty_cache()
  gc.collect()

  return decoded_response

In [None]:
# Verilen model üzerinden, verilen birden fazla prompt için batch mantığıyla çıktıların üretimi gerçekleştirilir.
@torch.inference_mode()
def helper_generate_output_for_many_prompts_with_batches(model_and_tokenizer_path, prompts, batch_size=8, device=DEVICE):

    model =  AutoModelForCausalLM.from_pretrained(model_and_tokenizer_path, dtype=DTYPE).to(device)

    tokenizer = AutoTokenizer.from_pretrained(model_and_tokenizer_path)
    tokenizer.padding_side = "left"
    if tokenizer.pad_token is None:
        tokenizer.pad_token = tokenizer.eos_token

    model.eval()

    all_outputs = []
    all_processed_prompts = []

    print(f"{len(prompts)} tane prompt işleniyor.")

    for i in tqdm(range(0, len(prompts), batch_size)):
        batch_raw_prompts = prompts[i : i + batch_size]

        formatted_batch = []

        for raw_prompt in batch_raw_prompts:
            messages = [{"content": raw_prompt, "role": "user"}]
            formatted_prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
            formatted_batch.append(formatted_prompt)

        tokenized_inputs = tokenizer(
            formatted_batch,
            return_tensors="pt",
            truncation=True,
            padding=True,
            add_special_tokens=False
        ).to(device)

        input_ids_tensor = tokenized_inputs["input_ids"]
        input_length = input_ids_tensor.shape[-1]

        outputs = model.generate(
            **tokenized_inputs,
            max_new_tokens=256,
            do_sample=True,
            temperature=0.7,
            top_p=0.95,
            eos_token_id=tokenizer.eos_token_id,
            pad_token_id=tokenizer.pad_token_id
        )

        generated_tokens = outputs[:, input_length:]
        decoded_batch = tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)

        all_outputs.extend(decoded_batch)
        all_processed_prompts.extend(batch_raw_prompts)

    data_dict = {
        "prompt": all_processed_prompts,
        "responses": all_outputs
    }

    dataset = Dataset.from_dict(data_dict)

    del model
    del tokenizer
    torch.cuda.empty_cache()
    gc.collect()

    return dataset

In [None]:
# Eğitim sonunda elde edilen logların kaydedilmesini sağlar.
def helper_save_train_logs(log_history, dir):
  folder_path = os.path.dirname(dir)

  if folder_path and not os.path.exists(folder_path):
      os.makedirs(folder_path, exist_ok=True)
      print(f"Klasör oluşturuldu: {folder_path}")

  with open(dir, "w") as f:
    json.dump(log_history, f, indent=4)

  print(f"Loglar {dir} konumuna kaydedildi\n")

In [None]:
# Kaydedilen eğitim loglarının geri yüklenmesini gerçekleştirir.
def helper_load_train_logs(dir):
  with open(dir,"r") as f:
    log = json.load(f)
    print(f"Loglar {dir} konumundan yüklendi\n")
  return log

In [None]:
# Faz 1 ve Faz 2 DPO eğitimine dair süreçlerin eğitim loglarını kullanarak görselleştirilmesini sağlar ve görselleştirmelerin kaydedilmesini gerçekleştirir.
def helper_plot_dpo_metrics(log_history, phase_select, trainer_select, dir):

    train_related_data = []
    eval_related_data = []

    for entry in log_history:
        if "loss" in entry and "eval_loss" not in entry:
            train_related_data.append({
                "step": entry["step"],
                "loss": entry["loss"],
                "accuracy": entry.get("rewards/accuracies"),
                "margin": entry.get("rewards/margins"),
                "chosen": entry.get("rewards/chosen"),
                "rejected": entry.get("rewards/rejected"),
                "type": "Train"
            })

        if "eval_loss" in entry:
            eval_related_data.append({
                "step": entry["step"],
                "loss": entry["eval_loss"],
                "accuracy": entry.get("eval_rewards/accuracies"),
                "margin": entry.get("eval_rewards/margins"),
                "chosen": entry.get("eval_rewards/chosen"),
                "rejected": entry.get("eval_rewards/rejected"),
                "type": "Eval"
            })

    if not train_related_data and not eval_related_data:
        print(f"Phase {phase_select} için veri bulunamadı.")
        return

    df_train_related_data = pd.DataFrame(train_related_data)
    df_eval_related_data = pd.DataFrame(eval_related_data)


    sns.set_theme(style="darkgrid")
    fig, axes = plt.subplots(2, 2, figsize=(18, 12))
    fig.suptitle(f'DPO Eğitim Analizi (Train vs Eval) - Trainer {trainer_select} Phase {phase_select}', fontsize=16, fontweight='bold')

    ax = axes[0, 0]
    if not df_train_related_data.empty:
        sns.lineplot(ax=ax, data=df_train_related_data, x="step", y="loss", label="Train Loss", color="tab:blue", linestyle="-")
    if not df_eval_related_data.empty:
        sns.lineplot(ax=ax, data=df_eval_related_data, x="step", y="loss", label="Eval Loss", color="tab:orange", linestyle="--", linewidth=2)
    ax.set_title("Training and Eval Loss")
    ax.set_ylabel("Loss")
    ax.legend()

    ax = axes[0, 1]
    if not df_train_related_data.empty and "accuracy" in df_train_related_data.columns and df_train_related_data["accuracy"].notna().any():
        sns.lineplot(ax=ax, data=df_train_related_data, x="step", y="accuracy", label="Train Acc", color="green", linestyle="-", alpha=0.6, marker="s")
    if not df_eval_related_data.empty and "accuracy" in df_eval_related_data.columns and df_eval_related_data["accuracy"].notna().any():
        sns.lineplot(ax=ax, data=df_eval_related_data, x="step", y="accuracy", label="Eval Acc", color="darkgreen", linestyle="--", linewidth=2, marker="o")

    ax.set_title("Training and Eval Accuracies")
    ax.set_ylabel("Accuracy")
    ax.axhline(0.5, ls=':', color='gray', alpha=0.5)
    ax.legend()

    ax = axes[1, 1]
    if not df_train_related_data.empty and "margin" in df_train_related_data.columns and df_train_related_data["margin"].notna().any():
        sns.lineplot(ax=ax, data=df_train_related_data, x="step", y="margin", label="Train Margin", color="purple", linestyle="-", alpha=0.6, marker="s")
    if not df_eval_related_data.empty and "margin" in df_eval_related_data.columns and df_eval_related_data["margin"].notna().any():
        sns.lineplot(ax=ax, data=df_eval_related_data, x="step", y="margin", label="Eval Margin", color="indigo", linestyle="--", linewidth=2, marker="o")

    ax.set_title("Train and Eval Margin")
    ax.set_ylabel("Margin Score")
    ax.legend()

    ax = axes[1, 0]

    if not df_train_related_data.empty and "chosen" in df_train_related_data.columns and df_train_related_data["chosen"].notna().any():
        sns.lineplot(ax=ax, data=df_train_related_data, x="step", y="chosen", color="blue", linestyle="-", alpha=0.4, label="Train Chosen")
        sns.lineplot(ax=ax, data=df_train_related_data, x="step", y="rejected", color="red", linestyle="-", alpha=0.4, label="Train Rejected")

    if not df_eval_related_data.empty and "chosen" in df_eval_related_data.columns and df_eval_related_data["chosen"].notna().any():
        sns.lineplot(ax=ax, data=df_eval_related_data, x="step", y="chosen", color="blue", linestyle="--", linewidth=2, label="Eval Chosen")
        sns.lineplot(ax=ax, data=df_eval_related_data, x="step", y="rejected", color="red", linestyle="--", linewidth=2, label="Eval Rejected")

    ax.set_title("Train and Eval Rewards")
    ax.set_ylabel("Rewards")
    ax.legend(fontsize='small')

    plt.tight_layout()

    folder_path = os.path.dirname(dir)

    if folder_path and not os.path.exists(folder_path):
      os.makedirs(folder_path, exist_ok=True)
      print(f"Klasör oluşturuldu: {folder_path}")

    if dir:
        plt.savefig(dir, dpi=300, bbox_inches='tight')
        print(f"Plot '{dir}' konumuna kaydedildi.")

In [None]:
all_training_args_phase_1 = [
    helper_generate_dpo_training_args(f"{CHECKPOINTS_PHASE_1_BASE_DIR}/dpo_adapter_{i}", phase_select=1)
    for i in range(1, 6)
]

In [None]:
phase_1_configs = [
    (1, all_training_args_phase_1[0], SUBSET_1_TRAIN_EASY, SUBSET_1_VAL),
    (2, all_training_args_phase_1[1], SUBSET_2_TRAIN_EASY, SUBSET_2_VAL),
    (3, all_training_args_phase_1[2], SUBSET_3_TRAIN_EASY, SUBSET_3_VAL),
    (4, all_training_args_phase_1[3], SUBSET_4_TRAIN_EASY, SUBSET_4_VAL),
    (5, all_training_args_phase_1[4], TOTAL_TRAIN_EASY, TOTAL_VAL),
]

In [None]:
# 4 ayrı alt küme için oluşturulmuş modellerin ve birleştirilmiş veri seti ile eğitilecek modelin Faz 1 DPO Eğitimi gerçekleştirilir. Loglar kaydedilir.
for model_id, args, train_ds, val_ds in phase_1_configs:

    print(f"\n{'='*10} Model {model_id} için Phase 1 Eğitimi Başlıyor {'='*10}")

    trainer = helper_generate_dpo_trainer(
        training_args=args,
        training_dataset=train_ds,
        val_dataset=val_ds,
        phase_select=1
    )

    trainer.train()

    adapter_save_path = f"{MODELS_PHASE_1_SAVE_BASE_DIR}/dpo_adapter_{model_id}"
    helper_save_adapter(trainer, adapter_save_path)
    print(f"Adapter kaydedildi: {adapter_save_path}")

    log_history = trainer.state.log_history[:]
    log_save_path = f"{LOGS_PHASE_1_BASE_DIR}/log_trainer{model_id}_phase_1.json"
    helper_save_train_logs(log_history, log_save_path)
    print(f"Loglar kaydedildi: {log_save_path}")

    print(f"Model {model_id} bellekten siliniyor.")

    del trainer
    del log_history

    torch.cuda.empty_cache()
    gc.collect()

    print(f"Model {model_id} tamamlandı.\n")

print("Tüm modeller başarıyla eğitildi ve adapterlar kaydedildi.")

In [None]:
#Faz 1 Eğitim sürecini tamamlayan modellerin eğitim süreçlerine ait grafikler çizdirilip kaydedilir.
print("Grafik çizdirme başlıyor.")

for i in range(1, 6):
    log_file_path = f"{LOGS_PHASE_1_BASE_DIR}/log_trainer{i}_phase_1.json"
    plot_save_path = f"{PLOTS_PHASE_1_BASE_DIR}/plot_dpo_metrics_trainer{i}_phase_1.png"


    current_log = helper_load_train_logs(log_file_path)

    helper_plot_dpo_metrics(
        current_log,
        phase_select=1,
        trainer_select=i,
        dir=plot_save_path
    )

    print(f"Grafik başarıyla kaydedildi: Trainer {i} - Phase 1")

print("Tüm grafik işlemleri tamamlandı.")

In [None]:
all_training_args_phase_2 = [
    helper_generate_dpo_training_args(f"{CHECKPOINTS_PHASE_2_BASE_DIR}/dpo_adapter_{i}", phase_select=2)
    for i in range(1, 6)
]

In [None]:
model_paths = ["/dpo_adapter_1", "/dpo_adapter_2" ,"/dpo_adapter_3" ,"/dpo_adapter_4"]

In [None]:
phase_2_configs = [
    (1, all_training_args_phase_2[0], SUBSET_1_TRAIN_HARD, SUBSET_1_VAL),
    (2, all_training_args_phase_2[1], SUBSET_2_TRAIN_HARD, SUBSET_2_VAL),
    (3, all_training_args_phase_2[2], SUBSET_3_TRAIN_HARD, SUBSET_3_VAL),
    (4, all_training_args_phase_2[3],SUBSET_4_TRAIN_HARD, SUBSET_4_VAL),
    (5, all_training_args_phase_2[4],TOTAL_TRAIN_HARD, TOTAL_VAL),
]

In [None]:
# 4 ayrı alt küme için oluşturulmuş modellerin ve birleştirilmiş veri seti ile eğitilecek modelin Faz 2 DPO Eğitimi gerçekleştirilir. Loglar kaydedilir.
for model_id, args, train_ds, val_ds in phase_2_configs:

    print(f"\n{'='*10} Model {model_id} için Phase 2 Eğitimi Başlıyor {'='*10}")

    load_path = f"{MODELS_PHASE_1_SAVE_BASE_DIR}/dpo_adapter_{model_id}"
    print(f"Phase 1 Modeli yükleniyor: {load_path}")

    current_model = helper_load_model(load_path)

    trainer = helper_generate_dpo_trainer(
        model=current_model,
        training_args=args,
        training_dataset=train_ds,
        val_dataset=val_ds,
        phase_select=2
    )

    trainer.train()

    adapter_save_path = f"{MODELS_PHASE_2_SAVE_BASE_DIR}/dpo_adapter_{model_id}"
    helper_save_adapter(trainer, adapter_save_path)
    print(f"Adapter kaydedildi: {adapter_save_path}")

    log_history = trainer.state.log_history[:]
    log_save_path = f"{LOGS_PHASE_2_BASE_DIR}/log_trainer{model_id}_phase_2.json"
    helper_save_train_logs(log_history, log_save_path)
    print(f"Loglar kaydedildi: {log_save_path}")

    print(f"Model {model_id} bellekten temizleniyor.")

    del trainer
    del current_model
    del log_history

    torch.cuda.empty_cache()
    gc.collect()

    print(f"Model {model_id} tamamlandı.\n")

print("Tüm modeller başarıyla eğitildi ve adapterlar kaydedildi.")

In [None]:
#Faz 2 Eğitim sürecini tamamlayan modellerin eğitim süreçlerine ait grafikler çizdirilip kaydedilir.
print("Grafik çizdirme başlıyor.")

for i in range(1, 6):
    log_file_path = f"{LOGS_PHASE_2_BASE_DIR}/log_trainer{i}_phase_2.json"
    plot_save_path = f"{PLOTS_PHASE_2_BASE_DIR}/plot_dpo_metrics_trainer{i}_phase_2.png"


    current_log = helper_load_train_logs(log_file_path)

    helper_plot_dpo_metrics(
        current_log,
        phase_select=2,
        trainer_select=i,
        dir=plot_save_path
    )

    print(f"Grafik başarıyla kaydedildi: Trainer {i} Phase 2")

print("Tüm grafik işlemleri tamamlandı.")

In [None]:
# Eğitim sonucu elde edilen LoRA adapterların base modelle birleştirilmesini sağlar.
def fuse_adapters_into_models(adapter_dirs, save_base_dir, phase_select , model_id=MODEL_ID,):

    tokenizer = AutoTokenizer.from_pretrained(model_id)
    fused_model_paths = []

    if not os.path.exists(save_base_dir):
        os.makedirs(save_base_dir, exist_ok=True)

    for i, adapter_path in enumerate(adapter_dirs):
        full_model_name = f"dpo_full_model_{i+1}_phase_{phase_select}"
        save_path = os.path.join(save_base_dir, full_model_name)

        print(f"Adapter, Model ile Birleştiriliyor {i+1}: {adapter_path}")

        model = helper_load_model(adapter_path)

        model = model.merge_and_unload()

        model.save_pretrained(save_path)
        tokenizer.save_pretrained(save_path)
        fused_model_paths.append(save_path)

        del model
        torch.cuda.empty_cache()
        gc.collect()
        print(f"Model Kaydedildi: {save_path}\n")

    return fused_model_paths

In [None]:
# 4 modelin birleştirilmesi sürecine yönelik MergeKit config oluşturulur. Modellerin hangi ağırlıkta birleştirileceğini kesinleştirmek için Faz 2 eğitimi sonundaki performansları değerlendirilir.
def create_merge_config(log_paths, fused_model_paths, output_yaml="config.yaml", base_model=MODEL_ID):

    if torch.cuda.is_available() and torch.cuda.is_bf16_supported():
        dtype_str = "bfloat16"
        print("Donanım bf16 destekliyor. Config 'bfloat16' olarak ayarlanıyor.")
    else:
        dtype_str = "float16"
        print("Donanım bf16 desteklemiyor. Config 'float16' olarak ayarlanıyor.")

    peak_accuracies = []

    for i, log_path in enumerate(log_paths):
        log_history = helper_load_train_logs(log_path)

        eval_accs = [
            entry.get('eval_rewards/accuracies', entry.get('eval_accuracy', 0.5))
            for entry in log_history
            if 'eval_rewards/accuracies' in entry or 'eval_accuracy' in entry
        ]

        peak_acc = max(eval_accs) if eval_accs else 0.5
        peak_accuracies.append(peak_acc)
        print(f"Model {i+1} Peak Accuracy: {peak_acc:.4f}")

    acc_np = np.array(peak_accuracies)
    squared_accs = acc_np ** 2
    weights = squared_accs / np.sum(squared_accs)

    yaml_lines = [
        "merge_method: dare_ties",
        f"base_model: {base_model}",
        "tokenizer_source: base",
        "parameters:",
        "  int8_mask: true",
        "  normalize: true",
        f"dtype: {dtype_str}",
        "models:"]

    for path, w in zip(fused_model_paths, weights):
        yaml_lines.append(f"  - model: {path}")
        yaml_lines.append(f"    parameters:")
        yaml_lines.append(f"      weight: {w:.4f}")
        yaml_lines.append(f"      density: 0.7")

    final_yaml = "\n".join(yaml_lines)

    with open(output_yaml, "w") as f:
        f.write(final_yaml)

    print(f"\n Merge config kaydedildi: {os.path.abspath(output_yaml)}")
    for i, w in enumerate(weights): print(f" - Model {i+1}: {w:.4f}")

In [None]:
PHASE_1_ADAPTER_DIRS = [
    f"{MODELS_PHASE_1_SAVE_BASE_DIR}/dpo_adapter_{i}"
    for i in range(1, 6)
]

PHASE_2_ADAPTER_DIRS = [
    f"{MODELS_PHASE_2_SAVE_BASE_DIR}/dpo_adapter_{i}"
    for i in range(1, 6)
]

PHASE_2_LOG_DIRS = [
    f"{LOGS_PHASE_2_BASE_DIR}/log_trainer{i}_phase_2.json"
    for i in range(1, 6)
]


FUSED_MODELS_SAVE_BASE_DIR = "/content/drive/MyDrive/KO_Project_2/full_models"

CONFIG_YAML_DIR = "/content/drive/MyDrive/KO_Project_2/config.yaml"

FUSED_MODELS_PATH_PHASE_1 = fuse_adapters_into_models(PHASE_1_ADAPTER_DIRS, FUSED_MODELS_SAVE_BASE_DIR, phase_select=1)
FUSED_MODELS_PATH_PHASE_2 = fuse_adapters_into_models(PHASE_2_ADAPTER_DIRS, FUSED_MODELS_SAVE_BASE_DIR, phase_select=2)


create_merge_config(PHASE_2_LOG_DIRS[:4], FUSED_MODELS_PATH_PHASE_2[:4], output_yaml=CONFIG_YAML_DIR)

In [None]:
# Merge işlemi gerçekleştirilir.
!mergekit-yaml /content/drive/MyDrive/KO_Project_2/config.yaml /content/drive/MyDrive/KO_Project_2/merged_model_final \
    --allow-crimes \
    --copy-tokenizer \
    --out-shard-size 1B

In [None]:
MERGED_MODEL_FINAL_PATH = "/content/drive/MyDrive/KO_Project_2/merged_model_final"

In [None]:
# Verilen model ve test seti için DPO metriklerinin test edilmesi sağlanır.
def evaluate_model_on_DPO_metrics(merged_model, eval_dataset, base_model_id=MODEL_ID, beta=0.08):

    print(f"Referans Model Yükleniyor ({base_model_id}).")

    ref_model = AutoModelForCausalLM.from_pretrained(
        base_model_id,
        torch_dtype=merged_model.dtype,
        device_map=DEVICE
    )
    ref_model.eval()

    eval_args = DPOConfig(
        output_dir="eval_temp_dir",
        per_device_eval_batch_size=2,
        remove_unused_columns=True,
        beta=beta,
        label_names=[],
        report_to="none",
    )

    trainer = DPOTrainer(
        model=merged_model,
        ref_model=ref_model,
        args=eval_args,
        train_dataset=eval_dataset,
        eval_dataset=eval_dataset,
        processing_class=TOKENIZER,
    )

    print("DPO Metrikleri üzerinden Test Başlıyor.")

    metrics = trainer.evaluate()

    clean_metrics = {
        "Accuracy": metrics.get("eval_rewards/accuracies", metrics.get("eval_accuracy", 0.0)),
        "Margin": metrics.get("eval_rewards/margins", metrics.get("eval_margins", 0.0)),
        "Loss": metrics.get("eval_loss", 0.0)
    }

    print("Test Tamamlandı.")

    del ref_model
    del trainer

    torch.cuda.empty_cache()
    gc.collect()

    return clean_metrics

In [None]:
# Test sonuçlarının görselleştirilip kaydedilmesini sağlar.
def plot_model_comparisons(results_dict, save_path="model_comparison.png", dpi=300):

    model_names = list(results_dict.keys())
    metrics = ["Accuracy", "Margin", "Loss"]

    colors = ['#2ecc71', '#3498db', '#e74c3c']

    data = {metric: [] for metric in metrics}
    for model in model_names:
        for metric in metrics:
            val = results_dict[model].get(metric, 0.0)
            data[metric].append(val)

    x = np.arange(len(model_names))
    width = 0.25
    multiplier = 0

    fig, ax = plt.subplots(figsize=(12, 6), layout='constrained')

    for attribute, color in zip(metrics, colors):
        offset = width * multiplier
        rects = ax.bar(x + offset, data[attribute], width, label=attribute, color=color, alpha=0.9, edgecolor='black', linewidth=0.5)
        ax.bar_label(rects, padding=3, fmt='%.2f', fontsize=9, rotation=0)
        multiplier += 1

    ax.set_ylabel('Değer')
    ax.set_title('Model Performans Karşılaştırması (DPO Test Seti)', fontsize=14, pad=15, fontweight='bold')
    ax.set_xticks(x + width, model_names)
    ax.set_ylim(0, 1.1)
    ax.legend(loc='upper left', ncols=3)
    ax.grid(axis='y', linestyle='--', alpha=0.3)

    save_dir = os.path.dirname(save_path)
    if save_dir and not os.path.exists(save_dir):
        os.makedirs(save_dir, exist_ok=True)

    plt.savefig(save_path, dpi=dpi, bbox_inches='tight')
    plt.close()

    print(f"Grafik başarıyla kaydedildi: {save_path}")

In [None]:
MODELS_TO_TEST = [
    ("Tek Model", f"{FUSED_MODELS_SAVE_BASE_DIR}/dpo_full_model_5_phase_2"),
    ("Merged Model", MERGED_MODEL_FINAL_PATH),
]

In [None]:
# Birleştirilmiş veri seti ile Faz 1 ve Faz 2 DPO eğitimini sağlamış tek model ile Merged edilmiş model test edilir.
all_metrics_storage = {}

for name, path in MODELS_TO_TEST:
    print(f"\n{'='*10} Test ediliyor: {name} {'='*10}")
    print(f"Yükleniyor: {path}")


    model = AutoModelForCausalLM.from_pretrained(
        path,
        torch_dtype=torch.bfloat16 if torch.cuda.is_bf16_supported() else torch.float16,
        device_map="cuda",
        trust_remote_code=True
    )

    metrics = evaluate_model_on_DPO_metrics(
        merged_model=model,
        eval_dataset=TOTAL_TEST,
        base_model_id="Qwen/Qwen2.5-0.5B-Instruct",
        beta=0.1
    )

    print(f"Sonuçlar: {metrics}")

    all_metrics_storage[name] = metrics


    if 'model' in locals():
        del model

    torch.cuda.empty_cache()
    gc.collect()

print("Test bitti, grafik çiziliyor.")
plot_model_comparisons(
    results_dict=all_metrics_storage,
    save_path="/content/drive/MyDrive/KO_Project_2/plots/final_benchmark.png"
)

In [None]:
# Merge edilmiş modelin çıktı performansını görmek amacıyla kullanılmıştır.
SAMPLE_SIZE = 50
print(f"Toplam Test Verisi: {len(TOTAL_TEST)}")

test_sample = TOTAL_TEST.shuffle(seed=42).select(range(SAMPLE_SIZE))

input_prompts = test_sample["prompt"]

print(f"{len(input_prompts)} adet prompt işleniyor.")

generated_dataset = helper_generate_output_for_many_prompts_with_batches(
    model_and_tokenizer_path=MERGED_MODEL_FINAL_PATH,
    prompts=input_prompts,
    batch_size=8,
    device="cuda"
)

df_results = generated_dataset.to_pandas()

df_results.rename(columns={"prompt": "Input_Prompt", "responses": "Merged_Model_Response"}, inplace=True)

save_path_csv = "/content/drive/MyDrive/KO_Project_2/merged_model_test_outputs.csv"
df_results.to_csv(save_path_csv, index=False)

print(f"\n İşleme tamamlandı. Sonuçlar kaydedildi: {save_path_csv}")

pd.set_option('display.max_colwidth', 150)
print(df_results.head(3))