In [2]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/llm-classification/sample_submission.csv
/kaggle/input/llm-classification/train.csv
/kaggle/input/llm-classification/test.csv


In [5]:
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import LabelEncoder

# Load data
train_data = pd.read_csv("/kaggle/input/llm-classification/train.csv")
test_data = pd.read_csv("/kaggle/input/llm-classification/test.csv")
sample_submission = pd.read_csv("/kaggle/input/llm-classification/sample_submission.csv")


In [6]:

train_data

Unnamed: 0,id,model_a,model_b,prompt,response_a,response_b,winner_model_a,winner_model_b,winner_tie
0,30192,gpt-4-1106-preview,gpt-4-0613,"[""Is it morally right to try to have a certain...","[""The question of whether it is morally right ...","[""As an AI, I don't have personal beliefs or o...",1,0,0
1,53567,koala-13b,gpt-4-0613,"[""What is the difference between marriage lice...","[""A marriage license is a legal document that ...","[""A marriage license and a marriage certificat...",0,1,0
2,65089,gpt-3.5-turbo-0613,mistral-medium,"[""explain function calling. how would you call...","[""Function calling is the process of invoking ...","[""Function calling is the process of invoking ...",0,0,1
3,96401,llama-2-13b-chat,mistral-7b-instruct,"[""How can I create a test set for a very rare ...","[""Creating a test set for a very rare category...","[""When building a classifier for a very rare c...",1,0,0
4,198779,koala-13b,gpt-3.5-turbo-0314,"[""What is the best way to travel from Tel-Aviv...","[""The best way to travel from Tel Aviv to Jeru...","[""The best way to travel from Tel-Aviv to Jeru...",0,1,0
...,...,...,...,...,...,...,...,...,...
57472,4294656694,gpt-4-0613,claude-1,"[""A simple mnemonic for \u03c0:\n\""How I wish ...","[""Sure, let's break it down:\n\n1. \""How\"" has...","[""Here is how that mnemonic represents the dig...",1,0,0
57473,4294692063,claude-2.0,llama-2-13b-chat,"[""In python, implement a naive Bayes with gaus...","[""Here is an implementation of a naive Bayes c...","[""Sure! Here's an implementation of a naive Ba...",1,0,0
57474,4294710549,claude-1,alpaca-13b,"[""is it unethical to work on building weapons?...","[""Working on weapons technology raises some et...","[""It depends on the context. Weapons can be us...",1,0,0
57475,4294899228,palm-2,tulu-2-dpo-70b,"[""If a bait contains 0,0025% bromadiolon then ...","[""Bromadiolone is a rodenticide which is most ...","[""As an AI language model, I do not promote or...",0,1,0


In [9]:
# create "combined_text" for combined features
train_data["combined_text"] = train_data["prompt"] + " " + train_data["response_a"] + " " + train_data["response_b"]
test_data["combined_text"] = test_data["prompt"] + " " + test_data["response_a"] + " " + test_data["response_b"]

# create target context
train_data["target"] = train_data[["winner_model_a", "winner_model_b", "winner_tie"]].idxmax(axis=1)
label_encoder = LabelEncoder()
# Encode "winner_model_a", "winner_model_b", “winner_tie” to int labels
train_data["target"] = label_encoder.fit_transform(train_data["target"])

In [13]:
#Split validation sets from train data
x_train, x_val, y_train, y_val = train_test_split(
    train_data["combined_text"], train_data["target"], test_size = 0.2, random_state=42
)

In [25]:
# Vectorize text using TF-IDF
vectorizer = TfidfVectorizer(max_features = 5000)
x_train_tfidf = vectorizer.fit_transform(x_train)
x_val_tfidf = vectorizer.transform(x_val)
test_tfidf = vectorizer.transform(test_data["combined_text"])

In [None]:
# Models to train
models = {
    "LogisticRegression": LogisticRegression(max_iter = 1000, random_state=42),
    "RandomForest": RandomForestClassifier(n_estimators = 100, random_state=42),
    "SVM": SVC(probability=True, random_state=42),
}

# Train each model and save predictions
for model_name, model in models.items():
    print(f"Training {model_name} ...")
    model.fit(x_train_tfidf.toarray(), y_train)

    #validate
    y_val_pred = model.predict(x_val_tfidf)
    acc = accuracy_score(y_val, y_val_pred)
    print(f"{model_name} validation accuracy: {acc:.4f}")

    # predict on test data
    test_probs = model.predict_proba(test_tfidf)

    # create submission file
    submission = sample_submission.copy()
    submission[["winner_model_a", "winner_model_b", "winner_tie"]] = test_probs
    submission.to_csv(f"submission_{model_name}.csv", index=False)
    print(f"Submission file for {model_name} saved!")

    

Training LogisticRegression ...
LogisticRegression validation accuracy: 0.3703
Submission file for LogisticRegression saved!
Training RandomForest ...
RandomForest validation accuracy: 0.3889
Submission file for RandomForest saved!
Training SVM ...
