In [1]:
!pip install -r requirements.txt

Collecting accelerate==1.2.1 (from -r requirements.txt (line 1))
  Downloading accelerate-1.2.1-py3-none-any.whl.metadata (19 kB)
Collecting aiohappyeyeballs==2.4.4 (from -r requirements.txt (line 2))
  Downloading aiohappyeyeballs-2.4.4-py3-none-any.whl.metadata (6.1 kB)
Collecting aiohttp==3.11.11 (from -r requirements.txt (line 3))
  Downloading aiohttp-3.11.11-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (7.7 kB)
Collecting aiosignal==1.3.2 (from -r requirements.txt (line 4))
  Downloading aiosignal-1.3.2-py2.py3-none-any.whl.metadata (3.8 kB)
Collecting async-timeout==5.0.1 (from -r requirements.txt (line 11))
  Downloading async_timeout-5.0.1-py3-none-any.whl.metadata (5.1 kB)
Collecting datasets==3.2.0 (from -r requirements.txt (line 22))
  Downloading datasets-3.2.0-py3-none-any.whl.metadata (20 kB)
Collecting dill==0.3.8 (from -r requirements.txt (line 27))
  Downloading dill-0.3.8-py3-none-any.whl.metadata (10 kB)
Collecting einops==0.8.0 (from -r requi

In [2]:
!pip install datasets
!pip install transformers
!pip install --upgrade transformers
!pip install einops
!pip install trl
!pip install huggingface_hub
!pip install unsloth
# Also get the latest nightly Unsloth!
!pip uninstall unsloth -y && pip install --upgrade --no-cache-dir --no-deps git+https://github.com/unslothai/unsloth.git
!pip install --no-deps --upgrade "flash-attn>=2.6.3"

Collecting datasets
  Using cached datasets-3.2.0-py3-none-any.whl.metadata (20 kB)
Collecting pyarrow>=15.0.0 (from datasets)
  Using cached pyarrow-18.1.0-cp310-cp310-manylinux_2_28_x86_64.whl.metadata (3.3 kB)
Collecting dill<0.3.9,>=0.3.0 (from datasets)
  Using cached dill-0.3.8-py3-none-any.whl.metadata (10 kB)
Collecting pandas (from datasets)
  Using cached pandas-2.2.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (89 kB)
Collecting requests>=2.32.2 (from datasets)
  Using cached requests-2.32.3-py3-none-any.whl.metadata (4.6 kB)
Collecting tqdm>=4.66.3 (from datasets)
  Using cached tqdm-4.67.1-py3-none-any.whl.metadata (57 kB)
Collecting xxhash (from datasets)
  Using cached xxhash-3.5.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)
Collecting multiprocess<0.70.17 (from datasets)
  Using cached multiprocess-0.70.16-py310-none-any.whl.metadata (7.2 kB)
Collecting aiohttp (from datasets)
  Using cached aiohttp-3.11.11-cp310-c

In [3]:
import torch
from datasets import load_dataset, Dataset
from transformers import AutoTokenizer, AutoModelForCausalLM
from unsloth import FastLanguageModel
from trl import DPOConfig, DPOTrainer
from huggingface_hub import notebook_login
notebook_login()

# Clearing GPU memory cache
torch.cuda.empty_cache()

# Set model device type to 'cuda'
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# Loading in an open source model with internal logit access
# tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-3.2-3B-Instruct", torch_dtype="auto", trust_remote_code=True)
# model = AutoModelForCausalLM.from_pretrained("meta-llama/Llama-3.2-3B-Instruct", trust_remote_code=True).to(device)

# Using unsloth fast library
model, tokenizer =  FastLanguageModel.from_pretrained("unsloth/gemma-2-9b", dtype=None)
model = FastLanguageModel.get_peft_model(model)

tokenizer.pad_token = tokenizer.eos_token

# Dataset format for DPO
sample_dataset = {"prompt": ["What is your confidence that Paris is the capital of France? Respond with a percentage"], "chosen": [" 90%."], "rejected": [" 50%."]}

train_dataset = load_dataset("trl-lib/ultrafeedback_binarized", split="train")
# train_dataset.set_format("torch", device="cpu")

training_args = DPOConfig(output_dir="Gemma-2_9B-DPO", 
                          logging_steps=10, 
                          bf16=True,
                          per_device_train_batch_size=4,  
                          gradient_accumulation_steps=16)

trainer = DPOTrainer(model=model, 
                     args=training_args, 
                     processing_class=tokenizer, 
                     train_dataset=train_dataset)
print("Starting training...")

trainer.train(resume_from_checkpoint=True)

# Save trained model
save_dir = "Gemma-2_9B_DPOtrained"
model.save_pretrained(save_dir)
tokenizer.save_pretrained(save_dir)
print(f"Trained model has been saved to {save_dir}")

model.save_pretrained(base_dir + save_dir)
tokenizer.save_pretrained(base_dir + save_dir)
print(f"Trained model has been saved to {base_dir + save_dir}")

🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.
🦥 Unsloth Zoo will now patch everything to make training faster!


VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

Using device: cuda:0
Unsloth: If you want to finetune Gemma 2, install flash-attn to make it faster!
To install flash-attn, do the below:

pip install --no-deps --upgrade "flash-attn>=2.6.3"
==((====))==  Unsloth 2025.1.5: Fast Gemma2 patching. Transformers: 4.47.1.
   \\   /|    GPU: NVIDIA H100 PCIe. Max memory: 79.097 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.5.1+cu124. CUDA: 9.0. CUDA Toolkit: 12.4. Triton: 3.1.0
\        /    Bfloat16 = TRUE. FA [Xformers = 0.0.29.post1. FA2 = False]
 "-____-"     Free Apache license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


model.safetensors:   0%|          | 0.00/6.13G [00:00<?, ?B/s]

KeyboardInterrupt: 

In [None]:
import torch
from transformers import AutoTokenizer, AutoModel, AutoModelForCausalLM
from datasets import load_dataset

# Set model device to 'cuda'
torch.set_default_device("cuda")

# Initialize model and corresponding tokenizer
tokenizer = AutoTokenizer.from_pretrained("./Qwen2_dpo_trained", torch_dtype="auto", trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained("./Qwen2_dpo_trained", trust_remote_code=True)


In [None]:
prompt = "how can i develop a habit of drawing daily"

inputs = tokenizer(prompt, return_tensors="pt")
inputs = inputs.to("cuda")

outputs = model.generate(**inputs, max_new_tokens=50, do_sample=False, temperature=None, top_p=None)

logits = outputs

# log_probs = torch.nn.functional.softmax(logits, dim=1)

# print(f"Logits: {logits}. S: {log_probs}")

output_answer = tokenizer.batch_decode(outputs, skip_special_tokens=True)[0]

print(output_answer)