In [1]:
!python --version

Python 3.12.7


In [35]:
import torch
import gc
import sys

def clear_all_memory():
    """Attempts to clear all memory (CPU and GPU)."""
    # Clear PyTorch CUDA cache
    torch.cuda.empty_cache()

    # Run garbage collection
    gc.collect()

    # Clear variables from global namespace (use with caution!)
    for name in dir():
        if not name.startswith('_'):
            del globals()[name]

    # Clear modules from sys.modules (use with extreme caution!)
    for name in list(sys.modules.keys()):
        if name not in ['sys', 'gc', 'torch']:  # Preserve essential modules
            del sys.modules[name]

    # Run garbage collection again
    gc.collect()

# Call the function to attempt to clear memory
clear_all_memory()

In [2]:
!pip install transformers"<=4.45"
!pip install accelerate
!pip install tokenizers
!pip install trl

Collecting transformers<=4.45
  Using cached transformers-4.45.0-py3-none-any.whl.metadata (44 kB)
Using cached transformers-4.45.0-py3-none-any.whl (9.9 MB)
Installing collected packages: transformers
  Attempting uninstall: transformers
    Found existing installation: transformers 4.46.3
    Uninstalling transformers-4.46.3:
      Successfully uninstalled transformers-4.46.3
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
trl 0.12.1 requires transformers>=4.46.0, but you have transformers 4.45.0 which is incompatible.[0m[31m
[0mSuccessfully installed transformers-4.45.0
Collecting transformers>=4.46.0 (from trl)
  Using cached transformers-4.46.3-py3-none-any.whl.metadata (44 kB)
Using cached transformers-4.46.3-py3-none-any.whl (10.0 MB)
Installing collected packages: transformers
  Attempting uninstall: transformers
    Found existi

In [3]:
!pip install datasets
!pip install peft



In [4]:
from datasets import load_dataset
from trl import DPOTrainer, DPOConfig
from transformers import AutoModelForCausalLM, AutoTokenizer
from peft import LoraConfig

  from .autonotebook import tqdm as notebook_tqdm


In [46]:
import torch

model_id = "meta-llama/Llama-3.2-1B-Instruct"

adapter_dir = './models/sft'
#output_dir = './models/merged_peft'

dpo_model = AutoModelForCausalLM.from_pretrained(adapter_dir, device_map="cuda", torch_dtype=torch.bfloat16)
#dpo_model = AutoModelForCausalLM.from_pretrained(model_id, device_map="cuda", torch_dtype=torch.bfloat16)
#dpo_model = dpo_model.merge_and_unload()

#dpo_model = AutoModelForCausalLM.from_pretrained(model_id)
dpo_tokenizer = AutoTokenizer.from_pretrained(model_id)
dpo_tokenizer.pad_token = dpo_tokenizer.eos_token

In [23]:
preference_data_file_name = "./parsed_preferences.json"
preference_dataset = load_dataset("json", data_files=preference_data_file_name,split="train")

In [24]:
preference_dataset

Dataset({
    features: ['prompt', 'chosen', 'rejected'],
    num_rows: 929
})

In [25]:
!pip install peft



In [41]:
dpo_config = DPOConfig(
    output_dir="./models/dpo_new",
    per_device_train_batch_size=1,
    num_train_epochs=2,
    gradient_accumulation_steps=1,
    gradient_checkpointing=True,
    fp16=True,
    logging_steps=10
)

from peft import LoraConfig



dpo_peft_config = LoraConfig(
    r=16,
    lora_alpha=16,
    target_modules="all-linear",
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM"
)

#dpo_peft_config.inference_mode = False


In [47]:
dpo_trainer = DPOTrainer(
    model=dpo_model,
    args=dpo_config,
    train_dataset=preference_dataset,
    tokenizer=dpo_tokenizer,
    peft_config=dpo_peft_config
)

In [48]:
dpo_trainer.train()

Could not estimate the number of tokens of the input, floating-point operations will not be computed


Step,Training Loss
10,0.6935
20,0.6934
30,0.6921
40,0.6933
50,0.6927
60,0.6926
70,0.6934
80,0.6925
90,0.6939
100,0.6931




TrainOutput(global_step=1858, training_loss=0.671996533678217, metrics={'train_runtime': 4521.4219, 'train_samples_per_second': 0.411, 'train_steps_per_second': 0.411, 'total_flos': 0.0, 'train_loss': 0.671996533678217, 'epoch': 2.0})

In [50]:
output_dir = './models/dpo_peft'
dpo_trainer.save_model(output_dir)