In [1]:
import pandas as pd
import numpy as np
import ast

from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score


import tensorflow as tf
import keras_nlp

from tqdm import tqdm


2025-07-15 05:19:53.803094: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1752556794.007352      19 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1752556794.060460      19 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


## Configuration

In [2]:
class CFG:
    preset = "deberta_v3_base_en"
    sequence_length = 512
    seed = 42
    label2name = {0: 'winner_model_a', 1: 'winner_model_b', 2: 'winner_tie'}
    name2label = {v:k for k, v in label2name.items()}
    class_labels = list(label2name.keys())
    class_names = list(label2name.values())




## Load & Preprocess Dat


In [3]:
train_df = pd.read_csv("/kaggle/input/llm-classification-finetuning/train.csv")
test_df = pd.read_csv("/kaggle/input/llm-classification-finetuning/test.csv")

## trainning data
train_df["prompt"] = train_df.prompt.map(lambda x: eval(x)[0])
train_df["response_a"] = train_df.response_a.map(lambda x: eval(x.replace("null","''"))[0])
train_df["response_b"] = train_df.response_b.map(lambda x: eval(x.replace("null", "''"))[0])

# Label conversion
train_df["class_name"] = train_df[["winner_model_a", "winner_model_b" , "winner_tie"]].idxmax(axis=1)
train_df["class_label"] = train_df.class_name.map(CFG.name2label)

## testing data
test_df["prompt"] = test_df.prompt.map(lambda x: eval(x)[0])
test_df["response_a"] = test_df.response_a.map(lambda x: eval(x.replace("null","''"))[0])
test_df["response_b"] = test_df.response_b.map(lambda x: eval(x.replace("null", "''"))[0])


In [4]:
def make_pairs(row):
    row["encode_fail"] = False
    try:
        prompt = row.prompt.encode("utf-8").decode("utf-8")
    except:
        prompt = ""
        row["encode_fail"] = True

    try:
        response_a = row.response_a.encode("utf-8").decode("utf-8")
    except:
        response_a = ""
        row["encode_fail"] = True

    try:
        response_b = row.response_b.encode("utf-8").decode("utf-8")
    except:
        response_b = ""
        row["encode_fail"] = True
        
    row['options'] = [f"Prompt: {prompt}\n\nResponse: {response_a}",  # Response from Model A
                      f"Prompt: {prompt}\n\nResponse: {response_b}"  # Response from Model B
                     ]
    return row


train_df = train_df.apply(make_pairs, axis=1)  
display(train_df.head(2))  

test_df = test_df.apply(make_pairs, axis=1) 
display(test_df.head(2))

Unnamed: 0,id,model_a,model_b,prompt,response_a,response_b,winner_model_a,winner_model_b,winner_tie,class_name,class_label,encode_fail,options
0,30192,gpt-4-1106-preview,gpt-4-0613,Is it morally right to try to have a certain p...,The question of whether it is morally right to...,"As an AI, I don't have personal beliefs or opi...",1,0,0,winner_model_a,0,False,[Prompt: Is it morally right to try to have a ...
1,53567,koala-13b,gpt-4-0613,What is the difference between marriage licens...,A marriage license is a legal document that al...,A marriage license and a marriage certificate ...,0,1,0,winner_model_b,1,False,[Prompt: What is the difference between marria...


Unnamed: 0,id,prompt,response_a,response_b,encode_fail,options
0,136060,"I have three oranges today, I ate an orange ye...",You have two oranges today.,You still have three oranges. Eating an orange...,False,"[Prompt: I have three oranges today, I ate an ..."
1,211333,You are a mediator in a heated political debat...,Thank you for sharing the details of the situa...,Mr Reddy and Ms Blue both have valid points in...,False,[Prompt: You are a mediator in a heated politi...


## Load Model

In [5]:
backbone = keras_nlp.models.DebertaV3Backbone.from_preset(CFG.preset)
preprocessor = keras_nlp.models.DebertaV3Preprocessor.from_preset(
    CFG.preset, sequence_length=CFG.sequence_length
)


I0000 00:00:1752556861.258113      19 gpu_device.cc:2022] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 15513 MB memory:  -> device: 0, name: Tesla P100-PCIE-16GB, pci bus id: 0000:00:04.0, compute capability: 6.0


## Embedding Extraction Function

In [6]:
def extract_cls_embeddings(text_list, batch_size=32):
    embeddings = []
    for i in tqdm(range(0, len(text_list), batch_size)):
        batch = text_list[i:i + batch_size]
        processed = preprocessor(batch)
        output = backbone(processed)  # shape: (batch, seq_len, hidden_dim)
        cls_vecs = output[:, 0, :]    # shape: (batch, hidden_dim)
        embeddings.append(cls_vecs.numpy())
    return np.vstack(embeddings)     # shape: (total_samples, hidden_dim)

## Generate Features

In [7]:
# Extract embeddings for both responses
emb_a = extract_cls_embeddings(train_df["options"].map(lambda x: x[0]).tolist())
emb_b = extract_cls_embeddings(train_df["options"].map(lambda x: x[1]).tolist())



100%|██████████| 1797/1797 [36:48<00:00,  1.23s/it]
100%|██████████| 1797/1797 [37:07<00:00,  1.24s/it]


In [8]:
# Concatenate
X_train = np.concatenate([emb_a, emb_b, emb_a - emb_b, emb_a * emb_b], axis=1)
y_train = train_df["class_label"].values


In [9]:
# Check shape
print("X_train shape:", X_train.shape)
print("y_train shape:", y_train.shape)


X_train shape: (57477, 3072)
y_train shape: (57477,)


## Train Logistic Regression

In [10]:
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=CFG.seed)

clf = LogisticRegression(
    C=0.5,  
    class_weight='balanced',  
    solver='saga',
    penalty='l2',
    max_iter=3000
)

clf.fit(X_train, y_train)

y_pred = clf.predict(X_val)
print("Accuracy:", accuracy_score(y_val, y_pred))
print(classification_report(y_val, y_pred, target_names=["A wins", "B wins", "Tie"]))


Accuracy: 0.47390396659707723
              precision    recall  f1-score   support

      A wins       0.50      0.49      0.50      4030
      B wins       0.50      0.49      0.50      3929
         Tie       0.42      0.44      0.43      3537

    accuracy                           0.47     11496
   macro avg       0.47      0.47      0.47     11496
weighted avg       0.47      0.47      0.47     11496



In [11]:
test_emb_a = extract_cls_embeddings(test_df["options"].map(lambda x: x[0]).tolist())
test_emb_b = extract_cls_embeddings(test_df["options"].map(lambda x: x[1]).tolist())

X_test = np.concatenate([
    test_emb_a, 
    test_emb_b, 
    test_emb_a - test_emb_b, 
    test_emb_a * test_emb_b
], axis=1)

100%|██████████| 1/1 [00:00<00:00,  2.71it/s]
100%|██████████| 1/1 [00:00<00:00,  2.71it/s]


In [12]:
test_preds = clf.predict(X_test)


In [13]:
test_labels = [CFG.label2name[p] for p in test_preds]


In [14]:
submission = pd.DataFrame({
    "id": test_df["id"],
    "winner": test_labels
})
submission.to_csv("submission.csv", index=False)
