# 3-class classification solution

Start with prepare the dataset:

In [5]:
from cProfile import label

import pandas as pd

df = pd.read_csv("../../data/cleaned_train.csv")

def get_value(row: "a row of dataframe"):
    if row['winner_model_a'] == 1:
        return 0
    elif row['winner_model_b'] == 1:
        return 1
    else:
        return 2

df["label"] = df.apply(get_value, axis = 1)     # axis=1: apply to every ROW

def gen_input(row):
    return f"[PROMPT] {row['prompt']} [RESPONSE_A] {row['response_a']} [RESPONSE_B] {row['response_b']}"

df["input"] = df.apply(gen_input, axis=1)
df[["label","input"]]

Unnamed: 0,label,input
0,0,"[PROMPT] [""Is it morally right to try to have ..."
1,1,"[PROMPT] [""What is the difference between marr..."
2,2,"[PROMPT] [""explain function calling. how would..."
3,0,"[PROMPT] [""How can I create a test set for a v..."
4,1,"[PROMPT] [""What is the best way to travel from..."
...,...,...
57472,0,"[PROMPT] [""A simple mnemonic for \u03c0:\n\""Ho..."
57473,0,"[PROMPT] [""In python, implement a naive Bayes ..."
57474,0,"[PROMPT] [""is it unethical to work on building..."
57475,1,"[PROMPT] [""If a bait contains 0,0025% bromadio..."


Then we apply the model from transformer.

First, we tokenize our input information, which means making them into numbers that BERT can understand.

In [6]:
from transformers import BertTokenizer

tknzr = BertTokenizer.from_pretrained("bert-base-uncased")
# tokenize
encodings = tknzr(
    df["input"].tolist(),
    truncation=True,
    padding=True,
    max_length=512,
    return_tensors="pt" # pytorch tensors
)

Second, we create dataset object for pytorch:

In [7]:
import torch
from torch.utils.data import Dataset

class LLMPDataset(Dataset):
    def __init__(self, encodings, labels):
        self.encodings = encodings  # input_ids and attention_mask
        self.labels = labels

    def __getitem__(self, i):
        item = {key: val[i] for key, val in self.encodings.items()}
        item["labels"] = torch.tensor(self.labels[i])
        return item

    def __len__(self):
        return len(self.labels)

dst = LLMPDataset(encodings, df['label'].tolist())

Load a pre-trained model:

In [8]:
from transformers import BertForSequenceClassification

# use the model with 3-output labels
model = BertForSequenceClassification.from_pretrained("bert-base-uncased", num_labels = 3)

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Set trainning arguments:

In [9]:
import accelerate
print(accelerate.__version__)

0.26.0


In [10]:
from transformers import TrainingArguments
import accelerate

train_arg = TrainingArguments(
    output_dir= "./train_result",
    num_train_epochs=3, # number of passes?
    per_device_train_batch_size=8,  # batch size
    per_device_eval_batch_size=8,
    eval_strategy = "epoch",
    weight_decay= 0.01,     # regularization
    logging_dir= "./logs",
    logging_steps=10
)

Then, train the model:

In [11]:
!pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118


Looking in indexes: https://download.pytorch.org/whl/cu118



[notice] A new release of pip is available: 23.2.1 -> 25.0.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [12]:
import torch
print(torch.cuda.is_available())  # should return True
print(torch.cuda.get_device_name(0))  # prints your GPU name if available

True
NVIDIA GeForce RTX 4060 Laptop GPU


In [None]:
from transformers import Trainer

trainer = Trainer(
    model = model,
    args= train_arg,
    train_dataset= dst,
    eval_dataset= dst
)

trainer.train()

Epoch,Training Loss,Validation Loss


Evaluate the result:

In [None]:
results = trainer.evaluate()
print(results)