## Baseline model - Naive Bayes

This notebook contains:
- Code for training a baseline model using Naive Bayes to classify texts as being either LLM-generated or human-written.

In [3]:
import pandas as pd

train_and_pretest_data = pd.read_csv('../data/detect_ai.csv')
test_data = pd.read_csv('../data/daigt_v4.csv')

print(f"Test data size before removing rows identical to the training data: {len(test_data)}")

train_and_pretest_data = train_and_pretest_data.drop_duplicates(subset='text')
test_data = test_data[~test_data['text'].isin(train_and_pretest_data['text'])]

print(f"Train and pretest data size: {len(train_and_pretest_data)}")
print(f"Test data size: {len(test_data)}")


Test data size before removing rows identical to the training data: 73573
Train and pretest data size: 158294
Test data size: 40202


In [5]:
from sklearn.model_selection import train_test_split
from nltk import word_tokenize
import pandas as pd
# import nltk
# nltk.download('punkt') # Uncomment this line if you haven't downloaded the 'punkt' package

def preprocess_data(data):
    tokens = word_tokenize(data.lower())
    return " ".join(tokens)

train_pretest_tokenized_df = pd.DataFrame(columns=['tokens', 'label'])
test_tokenized_df = pd.DataFrame(columns=['tokens', 'label'])

for index, row in train_and_pretest_data.iterrows():
    train_pretest_tokenized_df = pd.concat([train_pretest_tokenized_df, pd.DataFrame({'tokens': preprocess_data(row['text']), 'label': row['generated']}, index=[0])], ignore_index=True)

for index, row in test_data.iterrows():
    test_tokenized_df = pd.concat([test_tokenized_df, pd.DataFrame({'tokens': preprocess_data(row['text']), 'label': row['label']}, index=[0])], ignore_index=True)

train_data, pretest_data = train_test_split(train_pretest_tokenized_df, stratify=train_pretest_tokenized_df.label, test_size=0.2)

train_data.to_csv("../data/baseline_processed_train_data.csv")
pretest_data.to_csv("../data/baseline_processed_pretest_data.csv")
test_tokenized_df.to_csv("../data/baseline_processed_test_data.csv")


In [7]:
train_data = pd.read_csv("../data/baseline_processed_train_data.csv")
pretest_data = pd.read_csv("../data/baseline_processed_pretest_data.csv")
test_data = pd.read_csv("../data/baseline_processed_test_data.csv")

print(f"Number of entries in the training data: {train_data.shape[0]}")
print(f"Number of entries in the test data: {pretest_data.shape[0]}")

data_label_0_proportion = train_and_pretest_data[train_and_pretest_data.generated == 0].shape[0] / train_and_pretest_data.shape[0]
data_label_1_proportion = train_and_pretest_data[train_and_pretest_data.generated == 1].shape[0] / train_and_pretest_data.shape[0]

train_data_label_0_propotion = train_data[train_data.label == 0].shape[0] / train_data.shape[0]
train_data_label_1_propotion = train_data[train_data.label == 1].shape[0] / train_data.shape[0]

pretest_data_label_0_propotion = pretest_data[pretest_data.label == 0].shape[0] / pretest_data.shape[0]
pretest_data_label_1_propotion = pretest_data[pretest_data.label == 1].shape[0] / pretest_data.shape[0]

print("\n\nProportion of the data:")
print(f"\n{'':<20s} {'Data overall':<20s} {'Train data':<20s} {'Test data':<20s}")
print(f"{'Human written':<20s} {data_label_0_proportion:<20.4f} {train_data_label_0_propotion:<20.4f} {pretest_data_label_0_propotion:<20.4f}")
print(f"{'LLM generated':<20s} {data_label_1_proportion:<20.4f} {train_data_label_1_propotion:<20.4f} {pretest_data_label_1_propotion:<20.4f}")


Number of entries in the training data: 126635
Number of entries in the test data: 31659


Proportion of the data:

                     Data overall         Train data           Test data           
Human written        0.2288               0.2288               0.2288              
LLM generated        0.7712               0.7712               0.7712              


In [8]:
def get_features(text):
    """
    A simple feature extractor, based on Kochmar, 2022, p. 171

    :param text: a string
    :return: a dictionary of features
    """
    features = {}
    words = text.split(' ')
    for word in words:
        features[word.lower()] = True
    return features

train_features = [(get_features(row['tokens']), row['label']) for _, row in train_data.iterrows()]
pretest_features = [(get_features(row['tokens']), row['label']) for _, row in pretest_data.iterrows()]
test_features = [(get_features(row['tokens']), row['label']) for _, row in test_data.iterrows()]

print(f"Number of entries in the features of the training data: {len(train_features)}")
print(f"Number of entries in the features of the test data: {len(pretest_features)}")

print(train_features[0])




Number of entries in the features of the training data: 126635
Number of entries in the features of the test data: 31659
({'in': True, 'the': True, 'vast': True, 'and': True, 'treacherous': True, 'realm': True, 'of': True, 'underworld': True, ',': True, 'i': True, 'sindra': True, 'reigned': True, 'supreme': True, 'as': True, 'an': True, 'end-game': True, 'level': True, '50': True, 'god': True, '.': True, 'with': True, 'my': True, 'mastery': True, 'over': True, 'dark': True, 'magic': True, 'unparalleled': True, 'strength': True, 'was': True, 'feared': True, 'by': True, 'all': True, 'who': True, 'dared': True, 'to': True, 'challenge': True, 'me': True, 'however': True, 'position': True, 'power': True, 'would': True, 'soon': True, 'be': True, 'put': True, 'test': True, 'most': True, 'unexpected': True, 'manner': True, 'one': True, 'gloomy': True, 'day': True, 'strolled': True, 'through': True, 'desolate': True, 'plains': True, 'a': True, 'peculiar': True, 'sight': True, 'caught': True, 'a

In [9]:
from nltk import NaiveBayesClassifier

classifier = NaiveBayesClassifier.train(train_features)

In [11]:
from sklearn.metrics import f1_score

labels_train = [label for _, label in train_features]
labels_pretest = [label for _, label in pretest_features]
labels_test = [label for _, label in test_features]
predicted_labels_train = [classifier.classify(featureset) for featureset, _ in train_features]
predicted_labels_pretest = [classifier.classify(featureset) for featureset, _ in pretest_features]
predicted_labels_test = [classifier.classify(featureset) for featureset, _ in test_features]
print(f"F1 score on the training data: {f1_score(labels_train, predicted_labels_train)}")
print(f"F1 score on the pretest data: {f1_score(labels_pretest, predicted_labels_pretest)}")
print(f"F1 score on the test data: {f1_score(labels_test, predicted_labels_test)}")

F1 score on the training data: 0.7527611518213968
F1 score on the pretest data: 0.7429676076234851
F1 score on the test data: 0.9588292238483002
