In [98]:
import datasets
import seaborn as sns
sns.set_style("whitegrid")
import matplotlib.pyplot as plt
import numpy as np
import sys
from pathlib import Path
sys.path.append(str(Path.cwd().parent))
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch
from torch.nn import Module, Linear, BatchNorm1d, Dropout, BCEWithLogitsLoss

In [2]:
def remove_extra_brackets(input: str) -> str:
    text = input[2:-2]
    text = text.strip()
    return text

In [3]:
# Load multiple CSV files
df = datasets.load_dataset('csv', data_files={
    'train': './data/train.csv',
    'test': './data/test.csv'
})

In [4]:
def fix_dataset(row):
    cleaned_prompt = remove_extra_brackets(row['prompt'])
    cleaned_response_a = remove_extra_brackets(row['response_a'])
    cleaned_response_b = remove_extra_brackets(row['response_b'])
    return {
        "response_a": cleaned_response_a,
        "response_b": cleaned_response_b,
        "prompt": cleaned_prompt,
    }

In [5]:
df = df.map(fix_dataset, batched=False).remove_columns(['id', 'model_a', 'model_b'])

In [7]:
tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen3-4B-Instruct-2507")
model = AutoModelForCausalLM.from_pretrained("Qwen/Qwen3-4B-Instruct-2507", device_map="cuda", dtype=torch.bfloat16)

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

In [49]:
class PreferenceHead(Module):
    def __init__(self):
        super().__init__()
        self.linear1 = Linear(2560, 1280)
        self.linear2 = Linear(1280, 640)
        self.linear3 = Linear(640, 320)
        self.linear4 = Linear(320, 3)

    def forward(self, x):
        x = self.linear1(x)
        x = self.linear2(x)
        x = self.linear3(x)
        x = self.linear4(x)
        return x[:, -1]

In [50]:
model.lm_head = PreferenceHead().to("cuda", dtype=torch.bfloat16)

In [93]:
text = df["train"]["response_a"][0:2]

In [94]:
encoded_text = tokenizer(text, return_tensors="pt", padding="longest")["input_ids"]

In [95]:
encoded_text.shape

torch.Size([2, 921])

In [96]:
with torch.no_grad():
    output = model(encoded_text.to("cuda"))

In [99]:
loss_fn = BCEWithLogitsLoss()

In [103]:
predicitons = output.logits.cpu()
predicitons

tensor([[ 0.3340, -0.3027,  0.0488],
        [-0.9531,  0.1289, -0.0835]], dtype=torch.bfloat16)

In [107]:
targets = torch.tensor(
    [
        [1,0,0],
        [0,1,0]
    ], dtype=torch.float16
)

In [108]:
targets

tensor([[1., 0., 0.],
        [0., 1., 0.]], dtype=torch.float16)

In [110]:
loss_fn(targets, predicitons)

tensor(0.8242, dtype=torch.bfloat16)