In [1]:
import numpy as np

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))
import pandas as pd

from transformers import (
    AutoModelForSequenceClassification,
    AutoTokenizer,
    BitsAndBytesConfig,
    DataCollatorWithPadding,
)
from torch.utils.data import DataLoader
import numpy as np
import torch
import datasets
from torch.nn import CrossEntropyLoss
from tqdm import tqdm
from torch.nn.functional import softmax

/kaggle/input/llm-classification-finetuning/sample_submission.csv
/kaggle/input/llm-classification-finetuning/train.csv
/kaggle/input/llm-classification-finetuning/test.csv
/kaggle/input/llm-preference-user-classification/__results__.html
/kaggle/input/llm-preference-user-classification/__huggingface_repos__.json
/kaggle/input/llm-preference-user-classification/__notebook__.ipynb
/kaggle/input/llm-preference-user-classification/__output__.json
/kaggle/input/llm-preference-user-classification/custom.css
/kaggle/input/llm-preference-user-classification/full_offline_model/model.safetensors.index.json
/kaggle/input/llm-preference-user-classification/full_offline_model/config.json
/kaggle/input/llm-preference-user-classification/full_offline_model/merges.txt
/kaggle/input/llm-preference-user-classification/full_offline_model/model-00001-of-00002.safetensors
/kaggle/input/llm-preference-user-classification/full_offline_model/model-00002-of-00002.safetensors
/kaggle/input/llm-preference-user-

2026-01-03 16:48:53.667267: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1767458933.819202      24 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1767458933.861335      24 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
W0000 00:00:1767458934.224253      24 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1767458934.224290      24 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1767458934.224294      24 computation_placer.cc:177] computation placer alr

In [2]:
model_path = "/kaggle/input/llm-preference-user-classification/full_offline_model"
tokenizer = AutoTokenizer.from_pretrained(model_path, local_files_only=True)
tokenizer.pad_token = tokenizer.eos_token
model = AutoModelForSequenceClassification.from_pretrained(
    model_path,
    attn_implementation="sdpa",
    num_labels=3,
    dtype=torch.bfloat16,
    device_map="cuda",
    local_files_only=True
)
model.config.pad_token_id = model.config.eos_token_id

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [3]:
# Load multiple CSV files
# df = datasets.load_dataset(
#     "csv", data_files={"train":"/kaggle/input/llm-classification-finetuning/train.csv","test": "/kaggle/input/llm-classification-finetuning/test.csv"}
# )
df = datasets.load_dataset(
    "csv", data_files={"test": "/kaggle/input/llm-classification-finetuning/test.csv"}
)

Generating test split: 0 examples [00:00, ? examples/s]

In [4]:
def remove_extra_brackets(text: str) -> str:
    if not isinstance(text, str): return ""
    text = text.strip()
    if len(text) > 4 and text.startswith("['") and text.endswith("']"):
        return text[2:-2]
    return text

CLASSIFICATION_PROMPT = """
You are an expert AI assistant that is specialized in selecting user preferences.
The task it that you are provided with a prompt and two responses (A and B) to that prompt from different LLMs.
The possible outcomes are 3 classes:
- Winner A: Response A is better
- Winner B: Response B is better
- Tie: Both responses are equally good

The prompt to the models is:
```
{prompt}
```

Then response A is:
```
{response_a}
```

And response B is:
```
{response_b}
```

Based on the above, classify which response is better by choosing one of the following options: "Winner A", "Winner B", or "Tie".
"""

In [5]:
def fix_dataset(row):
    cleaned_prompt = remove_extra_brackets(row["prompt"])
    cleaned_response_a = remove_extra_brackets(row["response_a"])
    cleaned_response_b = remove_extra_brackets(row["response_b"])

    full_prompt = CLASSIFICATION_PROMPT.format(
        prompt=cleaned_prompt,
        response_a=cleaned_response_a,
        response_b=cleaned_response_b,
    )

    tokenized = tokenizer(full_prompt,truncation=True, max_length=30000)


    return tokenized

df = df.map(fix_dataset, batched=False).remove_columns(
    [
        "prompt",
        "response_a",
        "response_b",
    ]
)

Map:   0%|          | 0/3 [00:00<?, ? examples/s]

In [6]:
df = df["test"].with_format("torch")
data_collator = DataCollatorWithPadding(tokenizer=tokenizer, padding=True)
dataloader = DataLoader(
    df, batch_size=1, shuffle=False, collate_fn=data_collator
)

In [None]:
# Set model to evaluation mode
model.eval()

# Generate predictions
ids = []
predictions = []

print(f"Processing {len(dataloader)} batches...")

for i, data in enumerate(tqdm(dataloader)):
    with torch.no_grad():
        with torch.autocast(device_type="cuda", dtype=torch.bfloat16):
            # Extract IDs and convert to list properly
            batch_ids = data.pop("id").cpu().tolist()
            
            # Move data to GPU
            data = {key: value.to("cuda") for key, value in data.items()}
            
            # Pass both input_ids AND attention_mask
            outputs = model(
                input_ids=data["input_ids"],
                attention_mask=data["attention_mask"]
            ).logits
            
            # Apply softmax on the LAST dimension (safer)
            # This ensures it works regardless of batch size
            probs = softmax(outputs, dim=-1).cpu().numpy()
            
            ids.extend(batch_ids)
            predictions.extend(probs)
            
    # Clear cache periodically
    if (i + 1) % 100 == 0:
        torch.cuda.empty_cache()

print(f"Processed {len(ids)} samples")

# Create submission dataframe
predictions = np.array(predictions)

# Ensure IDs are integers
ids = [int(id_val) for id_val in ids]

# Create dataframe
submission_df = pd.DataFrame({
    'id': ids,
    'winner_model_a': predictions[:, 0],
    'winner_model_b': predictions[:, 1],
    'winner_tie': predictions[:, 2]
})

# Ensure proper data types
submission_df['id'] = submission_df['id'].astype(int)

# Verify probabilities sum to 1
prob_cols = ['winner_model_a', 'winner_model_b', 'winner_tie']
row_sums = submission_df[prob_cols].sum(axis=1)

# Check if any row doesn't sum to ~1.0
if not np.allclose(row_sums, 1.0, atol=0.01):
    print(f"WARNING: Some probabilities don't sum to 1. Min: {row_sums.min()}, Max: {row_sums.max()}")
    # Normalize just to be safe
    for col in prob_cols:
        submission_df[col] = submission_df[col] / row_sums

# Sort by ID
submission_df = submission_df.sort_values('id').reset_index(drop=True)

# Save submission
submission_df.to_csv('/kaggle/working/submission.csv', index=False)

100%|██████████| 3/3 [00:04<00:00,  1.54s/it]
