In [None]:
!pip install -r requirements.txt

In [None]:
!pip install datasets
!pip install transformers
!pip install --upgrade transformers
!pip install einops
!pip install trl
!pip install huggingface_hub
!pip install unsloth
# Also get the latest nightly Unsloth!
!pip uninstall unsloth -y && pip install --upgrade --no-cache-dir --no-deps git+https://github.com/unslothai/unsloth.git

In [None]:
import torch
from datasets import load_dataset, Dataset
from transformers import AutoTokenizer, AutoModelForCausalLM
from unsloth import FastLanguageModel
from trl import DPOConfig, DPOTrainer
from huggingface_hub import notebook_login
notebook_login()

# Clearing GPU memory cache
torch.cuda.empty_cache()

# Set model device type to 'cuda'
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# Loading in an open source model with internal logit access
# tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-3.2-3B-Instruct", torch_dtype="auto", trust_remote_code=True)
# model = AutoModelForCausalLM.from_pretrained("meta-llama/Llama-3.2-3B-Instruct", trust_remote_code=True).to(device)

# Using unsloth fast library
model, tokenizer =  FastLanguageModel.from_pretrained("unsloth/Llama-3.2-3B-Instruct", dtype=None)
model = FastLanguageModel.get_peft_model(model)

tokenizer.pad_token = tokenizer.eos_token

# Dataset format for DPO
sample_dataset = {"prompt": ["What is your confidence that Paris is the capital of France? Respond with a percentage"], "chosen": [" 90%."], "rejected": [" 50%."]}

train_dataset = load_dataset("trl-lib/ultrafeedback_binarized", split="train")
# train_dataset.set_format("torch", device="cpu")

training_args = DPOConfig(output_dir="Llama3.2_3B-DPO", 
                          logging_steps=10, 
                          bf16=True,
                          per_device_train_batch_size=4,  
                          gradient_accumulation_steps=16)

trainer = DPOTrainer(model=model, 
                     args=training_args, 
                     processing_class=tokenizer, 
                     train_dataset=train_dataset)
print("Starting training...")

trainer.train(resume_from_checkpoint=True)

# Save trained model
save_dir = "Llama3.2_3B_DPOtrained"
model.save_pretrained(save_dir)
tokenizer.save_pretrained(save_dir)
print(f"Trained model has been saved to {save_dir}")

In [None]:
import torch
from transformers import AutoTokenizer, AutoModel, AutoModelForCausalLM
from datasets import load_dataset

# Set model device to 'cuda'
torch.set_default_device("cuda")

# Initialize model and corresponding tokenizer
tokenizer = AutoTokenizer.from_pretrained("./Llama3.2_3B_DPOtrained", torch_dtype="auto", trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained("./Llama3.2_3B_DPOtrained", trust_remote_code=True)


In [None]:
prompt = "how can i develop a habit of drawing daily"

inputs = tokenizer(prompt, return_tensors="pt")
inputs = inputs.to("cuda")

outputs = model.generate(**inputs, max_new_tokens=50, do_sample=False, temperature=None, top_p=None)

logits = outputs

# log_probs = torch.nn.functional.softmax(logits, dim=1)

# print(f"Logits: {logits}. S: {log_probs}")

output_answer = tokenizer.batch_decode(outputs, skip_special_tokens=True)[0]

print(output_answer)