In [1]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


# Setup

In [2]:
!pip install unsloth
!pip install transformers peft accelerate safetensors

from unsloth import FastLanguageModel, FastModel
from datasets import load_dataset
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from peft import PeftModel

print(torch.cuda.is_available())
print(torch.cuda.get_device_name(0))

🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.
🦥 Unsloth Zoo will now patch everything to make training faster!
True
Tesla T4


# Define Model

unsloth from gemma baseline

In [3]:
model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "unsloth/gemma-3-1b-it-unsloth-bnb-4bit",  # base model
    max_seq_length = 2048,
    load_in_4bit = True,  # or False if you have enough VRAM
    use_exact_model_name = True
)

model.load_adapter('/content/drive/MyDrive/DPO on Colab/lora_adapter')

#FastLanguageModel.for_training(model_b)

==((====))==  Unsloth 2025.4.7: Fast Gemma3 patching. Transformers: 4.51.3.
   \\   /|    Tesla T4. Num GPUs = 1. Max memory: 14.741 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.7.0+cu126. CUDA: 7.5. CUDA Toolkit: 12.6. Triton: 3.3.0
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.30. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!
Unsloth: Using float16 precision for gemma3 won't work! Using float32.


# Parameter Count

In [4]:
for param in model.parameters():
    if param.dtype.is_floating_point or param.is_complex():
        param.requires_grad = True

for name, param in model.named_parameters():
    if not param.requires_grad:
        print(name)

model.layers.0.self_attn.q_proj.base_layer.weight
model.layers.0.self_attn.k_proj.base_layer.weight
model.layers.0.self_attn.v_proj.base_layer.weight
model.layers.0.self_attn.o_proj.base_layer.weight
model.layers.0.mlp.gate_proj.base_layer.weight
model.layers.0.mlp.up_proj.base_layer.weight
model.layers.0.mlp.down_proj.base_layer.weight
model.layers.1.self_attn.q_proj.base_layer.weight
model.layers.1.self_attn.k_proj.base_layer.weight
model.layers.1.self_attn.v_proj.base_layer.weight
model.layers.1.self_attn.o_proj.base_layer.weight
model.layers.1.mlp.gate_proj.base_layer.weight
model.layers.1.mlp.up_proj.base_layer.weight
model.layers.1.mlp.down_proj.base_layer.weight
model.layers.2.self_attn.q_proj.base_layer.weight
model.layers.2.self_attn.k_proj.base_layer.weight
model.layers.2.self_attn.v_proj.base_layer.weight
model.layers.2.self_attn.o_proj.base_layer.weight
model.layers.2.mlp.gate_proj.base_layer.weight
model.layers.2.mlp.up_proj.base_layer.weight
model.layers.2.mlp.down_proj.b

In [5]:
model.train()  # Sets training mode
FastLanguageModel.for_training(model)
total_params = sum(p.numel() for p in model.parameters())
print(f"Total parameters: {total_params:,}")

trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
print(f"Trainable parameters: {trainable_params:,}")

print(trainable_params / total_params)

Total parameters: 675,994,752
Trainable parameters: 339,057,792
0.5015686749000087


#Load Dataset

In [13]:
import json
from torch.utils.data import Dataset, DataLoader

class JSONLDataset(Dataset):
    def __init__(self, filepath):
        self.samples = []
        with open(filepath, 'r') as f:
            for line in f:
                self.samples.append(json.loads(line))

    def __len__(self):
        return len(self.samples)

    def __getitem__(self, idx):
        sample = self.samples[idx]
        prompt = sample['prompt']
        chosen = prompt + sample['chosen_response']
        rejected = prompt + sample['rejected_response']
        return {
            'prompt_chosen_response': chosen,
            'prompt_rejected_chosen_response': rejected,
            'chosen_scores': sample['chosen_scores'],
            'rejected_scores': sample['rejected_scores'],
        }

In [14]:
dataset = JSONLDataset('/content/drive/MyDrive/DPO on Colab/gemma3_dpo_scored_data.jsonl')
dataloader = DataLoader(dataset, batch_size=1, shuffle=True)

for batch in dataloader:
    print(batch['prompt_chosen_response'])
    print(batch['chosen_scores'])
    break

["Question: A 54-year-old woman presents with increasing shortness of breath on exertion for the past few months. She also complains of associated fatigue and some balance issues. The patient denies swelling of her feet and difficulty breathing at night or while lying down. Physical examination is significant for conjunctival pallor. A peripheral blood smear reveals macrocytosis and hypersegmented granulocytes. Which of the following substances, if elevated in this patient’s blood, would support the diagnosis of vitamin B12 deficiency?\n\nOptions:\nA. Methionine\nB. Cysteine\nC. Homocysteine\nD. Methylmalonyl-CoA\n\nChoose the best answer and provide a step-by-step explanation for your choice.D. Methylmalonyl-CoA\nExplanation: Elevated methylmalonyl-CoA supports vitamin B12 deficiency, as B12 is required for its conversion to succinyl-CoA. The patient's macrocytic anemia and hypersegmented neutrophils indicate impaired DNA synthesis due to B12 deficiency."]
{'accuracy': tensor([1.], dt

In [15]:
def train(model, tokenizer, dataloader, optimizer, epochs = 1):
    model.train()

    loss_fn = MedDPOLoss()

    for epoch in range(epochs):
      for batch in dataloader:
        optimizer.zero_grad()

        chosen_inputs = tokenizer(batch['prompt_chosen_response'], return_tensors='pt').to(device = 'cuda')
        chosen_rewards = batch['chosen_rewards']

        rejected_inputs = tokenizer(batch['rejected_chosen_response'], return_tensors='pt').to(device = 'cuda')
        rejected_rewards = batch['rejected_rewards']

        chosen_logits = model.generate(
            **chosen_inputs,  max_new_tokens=128,
            return_dict_in_generate = True,
            output_logits = True
        ).logits

        rejected_logits = model.generate(
            **rejected_inputs,  max_new_tokens=128,
            return_dict_in_generate = True,
            output_logits = True
        ).logits

        loss = loss_fn(chosen_logits, rejected_logits,
                       chosen_rewards, rejected_rewards)




In [8]:
dataset = load_dataset('json', data_files = {'train': '/content/drive/MyDrive/DPO on Colab/gemma3_dpo_scored_data.jsonl'})

ex = dataset['train'][0]

prompt = ex['prompt']

print(prompt)

print(ex['chosen_scores'])

print('-----------------------')

inputs = tokenizer(prompt, return_tensors="pt").to(device = "cuda")

# inputs['attention_mask'] = inputs['attention_mask'].to(dtype = torch.bfloat16)
# print(inputs['input_ids'].dtype, inputs['attention_mask'].dtype)


with torch.no_grad():

  chosen_outputs = model.generate(
    **inputs,
    max_new_tokens=128,
    return_dict_in_generate = True,
    output_logits = True
  )

  rejected_outputs = model.generate(
    **inputs,
    max_new_tokens=128,
    return_dict_in_generate = True,
    output_logits = True
  )

  logits = outputs.logits
  chosen_reward = ex["chosen_rewards"]
  rejected_rewards = ex["rejected_rewards"]




  print(outputs)
  print(tokenizer_b.decode(outputs[0], skip_special_tokens = True))


Question: A 5-year-old girl is brought to the physician because of a 2-day history of redness and foreign body sensation in both eyes. She has not had vision loss. Her mother reports that she has also had violent coughing spells followed by a high-pitched inspiratory sound during this time. For the past week, she has had low-grade fevers and a runny nose. Her only vaccinations were received at birth. Her temperature is 37.7°C (99.9°F). Examination shows conjunctival hemorrhage and petechiae. Oropharyngeal examination shows no abnormalities. Which of the following is the most appropriate pharmacotherapy?

Options:
A. Topical azithromycin
B. Oral azithromycin
C. Artificial tears
D. Topical tobramycin

Choose the best answer and provide a step-by-step explanation for your choice.
{'accuracy': 2.0, 'safety': 3.0, 'explanation_depth': 2.0}
-----------------------


In [None]:
print(tokenizer_test.decode(outputs.sequences[0], skip_special_tokens = True))

#_variableption_SE];
dis85� Al 
 ex.security.S\\ ex scre’strty fluidPassword_point¯¯¯¯etwork�rit.execute daylight��rit preventoin_INT.security daylight�ne_pointtring.htmdiv847’ ex normalail умialustomer


In [None]:
for name, param in model.named_parameters():
  if param.dtype == 'torch.float16':
      print(f"{name}: {param.dtype}")

tokenizer = AutoTokenizer.from_pretrained(model_name)

inputs = tokenizer(prompt, return_tensors="pt").to(model.device)

input_ids = inputs['input_ids']

inputs["attention_mask"] = inputs["attention_mask"].to(dtype=torch.bfloat16)
attention_mask = inputs['attention_mask']

print(input_ids.dtype, attention_mask.dtype)

model.eval()

with torch.no_grad():
    outputs = model(**inputs)
    logits = outputs.logits

print(logits)

torch.int64 torch.bfloat16


RuntimeError: expected mat1 and mat2 to have the same dtype, but got: float != c10::Half