In [1]:
import sys
from pathlib import Path
import numpy as np
import pandas as pd

from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer


In [5]:
# Reading test dataset
df = pd.read_csv("../data/hc3_sample_500.csv")

df = df[["text", "label"]].dropna()
display(df.head())
df["label"].value_counts()

Unnamed: 0,text,label
0,They are considered to be a cult . There is no...,human
1,It is not uncommon for people to have small bu...,ai
2,So the engine turns a stick that 's attached t...,human
3,It 's a push by a republican majority to gain ...,human
4,Lean manufacturing is a production philosophy ...,ai


label
human    347
ai       153
Name: count, dtype: int64

In [6]:
# Creating train and test sets
X_train, X_test, y_train, y_test = train_test_split(
    df["text"], df["label"],
    test_size=0.2,
    random_state=42,
    stratify=df["label"] # keeping class balance
)

print(len(X_train), "train samples")
print(len(X_test), "test samples")

400 train samples
100 test samples


In [None]:
vectorizer = TfidfVectorizer(
    max_features=5000,
    ngram_range=(1,2),
    stop_words="english" # removing stopwords (the, and, is etc.) as these words appear so frequently across both classes they don't carry discriminative power
)

X_train_tdif = vectorizer.fit_transform(X_train)
X_test_tdif = vectorizer.transform(X_test)

print(X_train_tdif.shape, X_test_tdif.shape)

(400, 5000) (100, 5000)


In [8]:
# Get feature names (the words/phrases that became columns)
feature_names = vectorizer.get_feature_names_out()

# Convert sparse matrix to a pandas DataFrame for exploration
import pandas as pd
tfidf_df = pd.DataFrame(X_train_tdif.toarray(), columns=feature_names)

# Peek at the first few rows
tfidf_df.head()


Unnamed: 0,000,000 thing,01100001,01100001 ascii,01100001 knows,02,02 account,092,092 98,10,...,zones,zones couple,zooming,zooming rink,zubir,zubir said,zuckerberg,zuckerberg 6th,école,école polytechnique
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [9]:
# Sum TF-IDF values across all documents
word_importance = tfidf_df.sum(axis=0).sort_values(ascending=False)
word_importance.head(20)


like         7.521142
people       6.755591
different    5.503651
just         5.441434
time         5.385257
make         5.321671
use          5.282473
used         4.894520
does         4.611028
way          4.581602
url_0        4.272174
think        4.083747
water        3.917500
need         3.916209
really       3.774487
help         3.724806
things       3.647322
know         3.538926
work         3.456713
air          3.435008
dtype: float64

In [10]:
# Attaching labels to the tfidf DataFrame
tfidf_labeled = tfidf_df.copy()
tfidf_labeled["label"] = y_train.reset_index(drop=True)

# Summing TF-IDF values separately for human and ai
word_importance_by_label = tfidf_labeled.groupby("label").sum().T  # transpose so words are rows

# Top words per class
top_human = word_importance_by_label["human"].sort_values(ascending=False).head(20)
top_ai = word_importance_by_label["ai"].sort_values(ascending=False).head(20)

print("Top words in human text:\n", top_human)
print("\nTop words in AI text:\n", top_ai)


Top words in human text:
 like         6.219424
just         4.927346
people       4.355501
url_0        4.272174
time         4.190696
does         3.996111
really       3.675495
think        3.567130
know         3.150442
need         3.003175
use          3.000890
things       2.997054
way          2.958185
different    2.948256
make         2.947044
water        2.857395
actually     2.687690
used         2.640587
thing        2.635477
lot          2.582788
Name: human, dtype: float64

Top words in AI text:
 help         3.267333
different    2.555395
people       2.400090
make         2.374627
use          2.281583
used         2.253933
important    1.953564
data         1.929019
financial    1.909913
able         1.707342
way          1.623417
blood        1.547508
including    1.526187
energy       1.515292
called       1.513787
company      1.507037
small        1.500514
air          1.491941
provide      1.413117
helps        1.411010
Name: ai, dtype: float64
