In [3]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/llm-classification/sample_submission.csv
/kaggle/input/llm-classification/train.csv
/kaggle/input/llm-classification/test.csv


In [4]:
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import LabelEncoder

# Load data
train_data = pd.read_csv("/kaggle/input/llm-classification/train.csv")
test_data = pd.read_csv("/kaggle/input/llm-classification/test.csv")
sample_submission = pd.read_csv("/kaggle/input/llm-classification/sample_submission.csv")


In [5]:

train_data

Unnamed: 0,id,model_a,model_b,prompt,response_a,response_b,winner_model_a,winner_model_b,winner_tie
0,30192,gpt-4-1106-preview,gpt-4-0613,"[""Is it morally right to try to have a certain...","[""The question of whether it is morally right ...","[""As an AI, I don't have personal beliefs or o...",1,0,0
1,53567,koala-13b,gpt-4-0613,"[""What is the difference between marriage lice...","[""A marriage license is a legal document that ...","[""A marriage license and a marriage certificat...",0,1,0
2,65089,gpt-3.5-turbo-0613,mistral-medium,"[""explain function calling. how would you call...","[""Function calling is the process of invoking ...","[""Function calling is the process of invoking ...",0,0,1
3,96401,llama-2-13b-chat,mistral-7b-instruct,"[""How can I create a test set for a very rare ...","[""Creating a test set for a very rare category...","[""When building a classifier for a very rare c...",1,0,0
4,198779,koala-13b,gpt-3.5-turbo-0314,"[""What is the best way to travel from Tel-Aviv...","[""The best way to travel from Tel Aviv to Jeru...","[""The best way to travel from Tel-Aviv to Jeru...",0,1,0
...,...,...,...,...,...,...,...,...,...
57472,4294656694,gpt-4-0613,claude-1,"[""A simple mnemonic for \u03c0:\n\""How I wish ...","[""Sure, let's break it down:\n\n1. \""How\"" has...","[""Here is how that mnemonic represents the dig...",1,0,0
57473,4294692063,claude-2.0,llama-2-13b-chat,"[""In python, implement a naive Bayes with gaus...","[""Here is an implementation of a naive Bayes c...","[""Sure! Here's an implementation of a naive Ba...",1,0,0
57474,4294710549,claude-1,alpaca-13b,"[""is it unethical to work on building weapons?...","[""Working on weapons technology raises some et...","[""It depends on the context. Weapons can be us...",1,0,0
57475,4294899228,palm-2,tulu-2-dpo-70b,"[""If a bait contains 0,0025% bromadiolon then ...","[""Bromadiolone is a rodenticide which is most ...","[""As an AI language model, I do not promote or...",0,1,0


In [6]:
# create "combined_text" for combined features
train_data["combined_text"] = train_data["prompt"] + " " + train_data["response_a"] + " " + train_data["response_b"]
test_data["combined_text"] = test_data["prompt"] + " " + test_data["response_a"] + " " + test_data["response_b"]

# create target context
train_data["target"] = train_data[["winner_model_a", "winner_model_b", "winner_tie"]].idxmax(axis=1)
label_encoder = LabelEncoder()
# Encode "winner_model_a", "winner_model_b", “winner_tie” to int labels
train_data["target"] = label_encoder.fit_transform(train_data["target"])

In [7]:
#Split validation sets from train data
x_train, x_val, y_train, y_val = train_test_split(
    train_data["combined_text"], train_data["target"], test_size = 0.2, random_state=42
)

In [8]:
# Vectorize text using TF-IDF
vectorizer = TfidfVectorizer(max_features = 5000)
x_train_tfidf = vectorizer.fit_transform(x_train)
x_val_tfidf = vectorizer.transform(x_val)
test_tfidf = vectorizer.transform(test_data["combined_text"])

In [24]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset

# Define DNN model
class DNN(nn.Module):
    def __init__(self, input_dim, output_dim):
        super(DNN, self).__init__()
        self.model = nn.Sequential(
            nn.Linear(input_dim, 512),
            nn.BatchNorm1d(512),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(512, 256),
            nn.BatchNorm1d(256),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(256, 128),
            nn.BatchNorm1d(128),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(128, 64),
            nn.BatchNorm1d(64),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(64, output_dim),
            nn.Softmax(dim=1) # 3 classification
        )

    def forward(self, x):
        return self.model(x)

In [27]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()

x_train_scaled = scaler.fit_transform(x_train_tfidf.toarray())
x_val_scaled = scaler.transform(x_val_tfidf.toarray())
test_scaled = scaler.transform(test_tfidf.toarray())

In [28]:
# transform data to tensor
x_train_tensor = torch.tensor(x_train_scaled, dtype = torch.float32)
x_val_tensor = torch.tensor(x_val_scaled, dtype = torch.float32)
y_train_tensor = torch.tensor(y_train.to_numpy(), dtype = torch.long)
y_val_tensor = torch.tensor(y_val.to_numpy(), dtype = torch.long)

test_tensor = torch.tensor(test_scaled, dtype = torch.float32)

train_dataset = TensorDataset(x_train_tensor, y_train_tensor)
train_loader = DataLoader(train_dataset, batch_size = 32, shuffle = True)

In [15]:
# Here is the unscalered vision data
# # transform data to tensor
# x_train_tensor = torch.tensor(x_train_tfidf.toarray(), dtype = torch.float32)
# x_val_tensor = torch.tensor(x_val_tfidf.toarray(), dtype = torch.float32)
# y_train_tensor = torch.tensor(y_train.to_numpy(), dtype = torch.long)
# y_val_tensor = torch.tensor(y_val.to_numpy(), dtype = torch.long)

# test_tensor = torch.tensor(test_tfidf.toarray(), dtype = torch.float32)

# train_dataset = TensorDataset(x_train_tensor, y_train_tensor)
# train_loader = DataLoader(train_dataset, batch_size = 32, shuffle = True)


In [29]:
# train DNN
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
input_dim = x_train_tensor.shape[1]
output_dim = 3 # 3 classes
model = DNN(input_dim, output_dim).to(device)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr = 1e-2)

for epoch in range(100):
    model.train()
    total_loss = 0
    for x_batch, y_batch in train_loader:
        x_batch, y_batch = x_batch.to(device), y_batch.to(device)

        optimizer.zero_grad()
        y_pred = model.forward(x_batch)
        loss = criterion(y_pred, y_batch)
        loss.backward()
        optimizer.step()

        total_loss += loss.item()

    print(f"Epoch {epoch + 1}: loss: {total_loss:.4f}")

# validation
model.eval()
with torch.no_grad():
    y_val_pred = model(x_val_tensor.to(device)).argmax(dim=1).cpu().numpy()
    acc = accuracy_score(y_val_pred, y_val_tensor.numpy())
    print(f"DNN validation accuracy:{acc:.4f}")

    test_probs = model(test_tensor.to(device)).cpu().numpy()
    submission = sample_submission.copy()
    submission[["winner_model_a", "winner_model_b", "winner_tie"]] = test_probs
    submission.to_csv("submission_DNN.csv", index = False)
    print("DNN submission saved!")

Epoch 1: loss: 1568.5795
Epoch 2: loss: 1546.6425
Epoch 3: loss: 1522.9871
Epoch 4: loss: 1493.9297
Epoch 5: loss: 1457.5407
Epoch 6: loss: 1408.3172
Epoch 7: loss: 1348.8594
Epoch 8: loss: 1309.2673
Epoch 9: loss: 1278.1895
Epoch 10: loss: 1249.8742
Epoch 11: loss: 1231.8438
Epoch 12: loss: 1214.8586
Epoch 13: loss: 1199.1082
Epoch 14: loss: 1182.3484
Epoch 15: loss: 1178.1267
Epoch 16: loss: 1169.8169
Epoch 17: loss: 1157.9682
Epoch 18: loss: 1146.8519
Epoch 19: loss: 1137.1942
Epoch 20: loss: 1130.5311
Epoch 21: loss: 1121.6830
Epoch 22: loss: 1123.5311
Epoch 23: loss: 1117.3281
Epoch 24: loss: 1112.0403
Epoch 25: loss: 1099.2989
Epoch 26: loss: 1098.0708
Epoch 27: loss: 1092.9476
Epoch 28: loss: 1090.3104
Epoch 29: loss: 1084.7247
Epoch 30: loss: 1074.9704
Epoch 31: loss: 1071.6748
Epoch 32: loss: 1074.4216
Epoch 33: loss: 1070.9640
Epoch 34: loss: 1070.5187
Epoch 35: loss: 1065.0475
Epoch 36: loss: 1056.3188
Epoch 37: loss: 1055.9644
Epoch 38: loss: 1047.4531
Epoch 39: loss: 1046.

In [10]:
# # Models to train
# models = {
#     "LogisticRegression": LogisticRegression(max_iter = 1000, random_state=42),
#     "RandomForest": RandomForestClassifier(n_estimators = 100, random_state=42),
#     "SVM": SVC(probability=True, random_state=42),
# }



# # Train each model and save predictions
# for model_name, model in models.items():
#     print(f"Training {model_name} ...")
#     model.fit(x_train_tfidf.toarray(), y_train)

#     #validate
#     y_val_pred = model.predict(x_val_tfidf)
#     acc = accuracy_score(y_val, y_val_pred)
#     print(f"{model_name} validation accuracy: {acc:.4f}")

#     # predict on test data
#     test_probs = model.predict_proba(test_tfidf)

#     # create submission file
#     submission = sample_submission.copy()
#     submission[["winner_model_a", "winner_model_b", "winner_tie"]] = test_probs
#     submission.to_csv(f"submission_{model_name}.csv", index=False)
#     print(f"Submission file for {model_name} saved!")

    