In [10]:
import pandas as pd
from transformers import AutoTokenizer, AutoModelForSequenceClassification, set_seed
import os
set_seed(42)



In [11]:
from kaggle_secrets import UserSecretsClient
import wandb

# Access the secret from Kaggle Secrets
user_secrets = UserSecretsClient()
wandb_key = user_secrets.get_secret("WANDB_API_KEY")

# Set it as an environment variable
os.environ["WANDB_API_KEY"] = wandb_key

# Login to wandb
wandb.login()


True

In [12]:
train = pd.read_csv("/kaggle/input/finetuning/train.csv")
test = pd.read_csv("/kaggle/input/finetuning/test.csv")
# Start with a smaller model to test
BASE_MODEL = "bert-base-uncased"  

# Later try:
# BASE_MODEL = "roberta-base"
# BASE_MODEL = "roberta-large"
# BASE_MODEL = "microsoft/deberta-v3-large"
tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL)
model = AutoModelForSequenceClassification.from_pretrained(BASE_MODEL, num_labels=3)



Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [13]:
def get_label(row):
    if row["winner_model_a"] == 1:
        return 0
    elif row["winner_model_b"] == 1:
        return 1
    else:
        return 2

train["label"] = train.apply(get_label, axis=1)
train["text_a"] = train["prompt"].astype(str) + " [RESP_A] " + train["response_a"].astype(str)
train["text_b"] = train["prompt"].astype(str) + " [RESP_B] " + train["response_b"].astype(str)


In [14]:
from transformers import AutoTokenizer
from torch.utils.data import Dataset

tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")

class PreferenceDataset(Dataset):
    def __init__(self, df, tokenizer, max_len=256):
        self.text_a = df["text_a"].tolist()
        self.text_b = df["text_b"].tolist()
        self.labels = df["label"].tolist()
        self.tokenizer = tokenizer
        self.max_len = max_len

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        encoding = self.tokenizer(
            self.text_a[idx],
            self.text_b[idx],
            truncation=True,
            padding="max_length",
            max_length=self.max_len,
            return_tensors="pt"
        )
        return {
            "input_ids": encoding["input_ids"].flatten(),
            "attention_mask": encoding["attention_mask"].flatten(),
            "labels": torch.tensor(self.labels[idx], dtype=torch.long)
        }

dataset = PreferenceDataset(train, tokenizer)


In [15]:
from sklearn.model_selection import train_test_split

train_df, val_df = train_test_split(train, test_size=0.1, stratify=train["label"], random_state=42)

train_ds = PreferenceDataset(train_df, tokenizer)
val_ds = PreferenceDataset(val_df, tokenizer)


In [16]:
import numpy as np
from sklearn.metrics import accuracy_score, f1_score

def compute_metrics(pred):
    labels = pred.label_ids
    preds = np.argmax(pred.predictions, axis=1)
    acc = accuracy_score(labels, preds)
    f1 = f1_score(labels, preds, average="weighted")
    return {"accuracy": acc, "f1": f1}


In [22]:
from transformers import AutoModelForSequenceClassification, TrainingArguments, Trainer
import torch

model = AutoModelForSequenceClassification.from_pretrained("bert-base-uncased", num_labels=3)
                                                          
training_args = TrainingArguments(
    output_dir="./results",
    eval_strategy="epoch",
    save_strategy="epoch",
    learning_rate=2e-5,
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    num_train_epochs=3,
    weight_decay=0.01,
    logging_dir="./logs",
    report_to="wandb",
    run_name="bert-finetune-preference",
    fp16=True
)


trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_ds,
    eval_dataset=val_ds,
    tokenizer=tokenizer,
    compute_metrics=compute_metrics
)


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


In [23]:
trainer.train()




Epoch,Training Loss,Validation Loss,Accuracy,F1
1,1.0577,1.042735,0.463814,0.460833
2,1.0254,1.04399,0.467815,0.463599
3,0.949,1.087812,0.450592,0.45044




TrainOutput(global_step=9702, training_loss=1.0212282468991976, metrics={'train_runtime': 6602.5338, 'train_samples_per_second': 23.504, 'train_steps_per_second': 1.469, 'total_flos': 2.041589097847757e+16, 'train_loss': 1.0212282468991976, 'epoch': 3.0})

In [25]:

# Assuming test_ds is a Hugging Face Dataset or list of texts
predictions = trainer.predict(test).predictions
predicted_classes = predictions.argmax(axis=1)
test_encodings = tokenizer(test_texts, truncation=True, padding=True, return_tensors="pt")


KeyError: 0