In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


# Setup

In [2]:
!pip install unsloth
!pip install transformers peft accelerate safetensors

from unsloth import FastLanguageModel, FastModel
from datasets import load_dataset
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from peft import PeftModel

print(torch.cuda.is_available())
print(torch.cuda.get_device_name(0))

Collecting unsloth
  Downloading unsloth-2025.4.7-py3-none-any.whl.metadata (46 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/46.8 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m46.8/46.8 kB[0m [31m2.8 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting unsloth_zoo>=2025.4.4 (from unsloth)
  Downloading unsloth_zoo-2025.4.4-py3-none-any.whl.metadata (8.0 kB)
Collecting xformers>=0.0.27.post2 (from unsloth)
  Downloading xformers-0.0.30-cp311-cp311-manylinux_2_28_x86_64.whl.metadata (1.0 kB)
Collecting bitsandbytes (from unsloth)
  Downloading bitsandbytes-0.45.5-py3-none-manylinux_2_24_x86_64.whl.metadata (5.0 kB)
Collecting tyro (from unsloth)
  Downloading tyro-0.9.19-py3-none-any.whl.metadata (9.9 kB)
Collecting datasets>=2.16.0 (from unsloth)
  Downloading datasets-3.5.1-py3-none-any.whl.metadata (19 kB)
Collecting trl!=0.15.0,!=0.9.0,!=0.9.1,!=0.9.2,!=0.9.3,<=0.15.2,>=0.7.9 (from unsloth)
  Downloadin

🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.
🦥 Unsloth Zoo will now patch everything to make training faster!
True
Tesla T4


# Define Model

unsloth model straight from checkpoint

In [None]:

model_a, tokenizer_a = FastLanguageModel.from_pretrained(
    model_name = '/content/drive/MyDrive/DPO/DPO on Colab/lora_adapter',
    max_seq_length = 2048,
)

FastLanguageModel.for_training(model_a)

AttributeError: 'NoneType' object has no attribute 'lower'

In [None]:
model_test, tokenizer_test = FastLanguageModel.from_pretrained()

==((====))==  Unsloth 2025.4.5: Fast Llama patching. Transformers: 4.51.3.
   \\   /|    Tesla T4. Num GPUs = 1. Max memory: 14.741 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.7.0+cu126. CUDA: 7.5. CUDA Toolkit: 12.6. Triton: 3.3.0
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.30. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


model.safetensors:   0%|          | 0.00/1.10G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/234 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/54.7k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/454 [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/17.2M [00:00<?, ?B/s]

unsloth from gemma baseline

In [11]:
model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "unsloth/gemma-3-1b-it-unsloth-bnb-4bit",  # base model
    max_seq_length = 2048,
    load_in_4bit = True,  # or False if you have enough VRAM
    use_exact_model_name = True
)

model.load_adapter('/content/drive/MyDrive/DPO on Colab/lora_adapter')

#FastLanguageModel.for_training(model_b)

==((====))==  Unsloth 2025.4.7: Fast Gemma3 patching. Transformers: 4.51.3.
   \\   /|    Tesla T4. Num GPUs = 1. Max memory: 14.741 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.7.0+cu126. CUDA: 7.5. CUDA Toolkit: 12.6. Triton: 3.3.0
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.30. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!
Unsloth: Using float16 precision for gemma3 won't work! Using float32.


standard transformers library model

In [None]:
model_name = '/content/drive/MyDrive/DPO/DPO on Colab/lora_adapter'  # Replace with 3B if available locally
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    device_map="auto",
    torch_dtype=torch.float16,
)

# Parameter Count

In [None]:
# for param in model.parameters():
#     if param.dtype.is_floating_point or param.is_complex():
#         param.requires_grad = True

for name, param in model.named_parameters():
    if not param.requires_grad:
        print(name)

In [17]:
model.train()  # Sets training mode
FastLanguageModel.for_training(model)
total_params = sum(p.numel() for p in model.parameters())
print(f"Total parameters: {total_params:,}")

trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
print(f"Trainable parameters: {trainable_params:,}")

print(trainable_params / total_params)

Total parameters: 675,994,752
Trainable parameters: 339,057,792
0.5015686749000087


#Load Dataset

In [None]:
dataset = load_dataset('json', data_files = {'train': '/content/drive/MyDrive/DPO/DPO on Colab/gemma3_dpo_scored_data.jsonl'})

ex = dataset['train'][0]

prompt = ex['prompt']

print(prompt)
print('-----------------------')

inputs = tokenizer_b(prompt, return_tensors="pt").to(device = "cuda")
inputs['attention_mask'] = inputs['attention_mask'].to(dtype = torch.bfloat16)
print(inputs['input_ids'].dtype, inputs['attention_mask'].dtype)

# print(next(model.parameters()).dtype)
# device = next(model.parameters()).device
# inputs = {k: v.to(device=device) for k, v in inputs.items()}

with torch.no_grad():
  chosen_outputs = model.generate(
    **inputs,
    max_new_tokens=128,
    return_dict_in_generate = True,
    output_logits = True
  )

  rejected_outputs = model.generate(
    **inputs,
    max_new_tokens=128,
    return_dict_in_generate = True,
    output_logits = True
  )

  logits = outputs.logits
  chosen_reward = ex["chosen_rewards"]
  rejected_rewards = ex["rejected_rewards"]




  print(outputs)
  print(tokenizer_b.decode(outputs[0], skip_special_tokens = True))


Question: A 5-year-old girl is brought to the physician because of a 2-day history of redness and foreign body sensation in both eyes. She has not had vision loss. Her mother reports that she has also had violent coughing spells followed by a high-pitched inspiratory sound during this time. For the past week, she has had low-grade fevers and a runny nose. Her only vaccinations were received at birth. Her temperature is 37.7°C (99.9°F). Examination shows conjunctival hemorrhage and petechiae. Oropharyngeal examination shows no abnormalities. Which of the following is the most appropriate pharmacotherapy?

Options:
A. Topical azithromycin
B. Oral azithromycin
C. Artificial tears
D. Topical tobramycin

Choose the best answer and provide a step-by-step explanation for your choice.
-----------------------
torch.int64 torch.bfloat16
Question: A 5-year-old girl is brought to the physician because of a 2-day history of redness and foreign body sensation in both eyes. She has not had vision los

In [None]:
print(tokenizer_test.decode(outputs.sequences[0], skip_special_tokens = True))

#_variableption_SE];
dis85� Al 
 ex.security.S\\ ex scre’strty fluidPassword_point¯¯¯¯etwork�rit.execute daylight��rit preventoin_INT.security daylight�ne_pointtring.htmdiv847’ ex normalail умialustomer


In [None]:
#

for name, param in model.named_parameters():
  if param.dtype == 'torch.float16':
      print(f"{name}: {param.dtype}")

tokenizer = AutoTokenizer.from_pretrained(model_name)

inputs = tokenizer(prompt, return_tensors="pt").to(model.device)

input_ids = inputs['input_ids']

inputs["attention_mask"] = inputs["attention_mask"].to(dtype=torch.bfloat16)
attention_mask = inputs['attention_mask']

print(input_ids.dtype, attention_mask.dtype)

model.eval()

with torch.no_grad():
    outputs = model(**inputs)
    logits = outputs.logits

print(logits)

torch.int64 torch.bfloat16


RuntimeError: expected mat1 and mat2 to have the same dtype, but got: float != c10::Half