# Benchmark of existing approaches for detecting machine-generated text

## Solaiman

_Irene Solaiman, Miles Brundage, Jack Clark, Amanda Askell, Ariel Herbert-Voss, Jeff Wu, Alec Radford,
Gretchen Krueger, Jong Wook Kim, Sarah Kreps, Miles McCain, Alex Newhouse, Jason Blazakis, Kris McGuffie, and Jasmine Wang. 2019. Release strategies and the social impacts of language models._

In [89]:
import os
import json
import numpy as np
from scipy import sparse

from sklearn.model_selection import PredefinedSplit, GridSearchCV
from sklearn.linear_model import LogisticRegression
from sklearn.feature_extraction.text import TfidfVectorizer

import pickle

In [90]:
def _load_split(data_dir, source, split, n=np.inf):
    path = os.path.join(data_dir, f'{source}.{split}.jsonl')
    texts = []
    for i, line in enumerate(open(path)):
        if i >= n:
            break
        texts.append(json.loads(line)['text'])
    return texts

def load_split(data_dir, source, webtext_source, split, n=np.inf):
    webtext = _load_split(data_dir, webtext_source, split, n=n//2)
    gen = _load_split(data_dir, source, split, n=n//2)
    texts = webtext+gen
    labels = [0]*len(webtext)+[1]*len(gen)
    return texts, labels


def load_single_split(data_dir, source, split, machine_generated=True, n=np.inf):
    data = _load_split(data_dir, source, split, n)
    if machine_generated:
        labels = [1]*len(data)
    else:
        labels = [0]*len(data)
    return data, labels

In [91]:
data_dir = './../../get_text_detect_space/datasets/GPT2vsWebText/'
log_dir = './results/solaiman' 
source='xl-1542M-clean'
webtext_source = 'webtext-clean'
n_train=200000
n_valid=10000
n_jobs=-1
verbose=False

In [92]:
train_texts, train_labels = load_split(data_dir, source, webtext_source, 'train', n=n_train)
valid_texts, valid_labels = load_split(data_dir, source, webtext_source, 'valid', n=n_valid)
test_texts, test_labels = load_split(data_dir, source, webtext_source, 'test')

In [93]:
len(train_texts)

200000

In [94]:
len(test_texts)

10000

In [95]:
vect = TfidfVectorizer(ngram_range=(1, 2), min_df=5, max_features=2**21)
train_features = vect.fit_transform(train_texts)

In [96]:
valid_features = vect.transform(valid_texts)
test_features = vect.transform(test_texts)

In [97]:
model = LogisticRegression(solver='liblinear')

skip the following grid search grid and use the default params

In [147]:
params = {'C': [1/64, 1/32, 1/16, 1/8, 1/4, 1/2, 1, 2, 4, 8, 16, 32, 64]}
# params = {'C': [1/8, 1, 8]}
split = PredefinedSplit([-1]*n_train+[0]*n_valid)
search = GridSearchCV(model, params, cv=split, n_jobs=n_jobs, verbose=verbose, refit=False)
search.fit(sparse.vstack([train_features, valid_features]), train_labels+valid_labels)
model = model.set_params(**search.best_params_)

In [99]:
model.fit(train_features, train_labels)

In [100]:
search.best_params_

{'C': 16}

In [101]:
filename = 'solaiman_logreg_linear_xl-1542M-clean_webtext-clean.sav'
pickle.dump(model, open(filename, 'wb'))

In [102]:
valid_accuracy = model.score(valid_features, valid_labels)*100.
test_accuracy = model.score(test_features, test_labels)*100.

In [103]:
data = {
    'source':source,
    'n_train':n_train,
    'valid_accuracy':valid_accuracy,
    'test_accuracy':test_accuracy
}
print(data)

{'source': 'xl-1542M-clean', 'n_train': 200000, 'valid_accuracy': 70.05, 'test_accuracy': 70.55}


In [104]:
json.dump(data, open(os.path.join(log_dir, f'{source}.json'), 'w'))

Only machine-generated

In [105]:
source

'xl-1542M-clean'

In [106]:
test_mg_texts, test_mg_labels = load_single_split(data_dir, source, 'test',machine_generated=True)
test_mg_features = vect.transform(test_mg_texts)

In [107]:
test_accuracy = model.score(test_mg_features, test_mg_labels)*100.

In [108]:
test_accuracy

70.02000000000001

In [109]:
webtext_source

'webtext-clean'

In [110]:
test_hw_texts, test_hw_labels = load_single_split(data_dir, webtext_source, 'test', machine_generated=False)
test_hw_features = vect.transform(test_hw_texts)
test_accuracy = model.score(test_hw_features, test_hw_labels)*100.
test_accuracy

71.08

## Other datasets

### Auxiliary functions

In [157]:
import numpy as np
import pandas as pd
from sklearn.utils import shuffle

def create_dataset(raw_data, is_machine_generated=True):
    X = np.array(raw_data)
    if is_machine_generated:
        y = np.ones_like(X, dtype=int)
    else:
        y = np.zeros_like(X, dtype=int)
    return pd.DataFrame(data={'X':X, 'y':y})

### GPT-3 Machine generated text

In [158]:
gpt3_data_path = "./../../get_text_detect_space/datasets/GPT3"
gpt3_ds_mg_filename = '175b_samples.jsonl'
with open(os.path.join(gpt3_data_path, gpt3_ds_mg_filename), 'r') as file:
    gpt3_ds_mg = [x for x in file.readlines() if x is not None and x != "\n"]
    

In [159]:
gpt3_mg_pd = create_dataset(gpt3_ds_mg, is_machine_generated=True)

In [160]:
gpt3_mg_pd

Unnamed: 0,X,y
0,"""Glacier Ridge Christian School\n\nGlacier Rid...",1
1,"""His father was a professor and his mother was...",1
2,"""Image caption Bailiffs removed protesters fro...",1
3,"""Contents\n\n1. The Meaning of Love\n\n2. Our ...",1
4,"""Interviews\n\nInterview with Ariel Efraim (So...",1
...,...,...
480,"""Get Rangers updates directly to your inbox Su...",1
481,"""Posted 01 September 2010 - 07:51 PM\n\nUsing ...",1
482,"""Jeff Chapman\n\nJeffrey Scott Chapman (born M...",1
483,"""You can look into the Issue Tracker and repor...",1


In [161]:
def get_features_and_labels_for_gpt3(dataset, vectorizer):
    gpt3_test_text = [i[1:-2] for i in dataset['X']]
    gpt3_test_labels = dataset['y'].tolist()
    gpt3_test_features = vectorizer.transform(gpt3_test_text)
    return (gpt3_test_features, gpt3_test_labels, gpt3_test_text)

In [162]:
g3_mg_feat, g3_mg_labels, _ = get_features_and_labels_for_gpt3(
    gpt3_mg_pd, vect)

In [163]:
gpt3_mg_accuracy = model.score(g3_mg_feat, g3_mg_labels)*100.

In [164]:
gpt3_mg_accuracy

15.257731958762887

In [183]:
model

### GROVER dataset p=0.94

In [165]:
grover_data_path = "./../../get_text_detect_space/datasets/GROVER/"
grover_datasets = sorted([f for f in os.listdir(grover_data_path) if os.path.isfile(os.path.join(grover_data_path, f))])

In [166]:
grover_datasets

['generator=base~dataset=p0.90.jsonl',
 'generator=base~dataset=p0.92.jsonl',
 'generator=base~dataset=p0.94.jsonl',
 'generator=base~dataset=p0.96.jsonl',
 'generator=base~dataset=p0.98.jsonl',
 'generator=base~dataset=p1.00.jsonl',
 'generator=medium~dataset=p0.90.jsonl',
 'generator=medium~dataset=p0.92.jsonl',
 'generator=medium~dataset=p0.94.jsonl',
 'generator=medium~dataset=p0.96.jsonl',
 'generator=medium~dataset=p0.98.jsonl',
 'generator=medium~dataset=p1.00.jsonl',
 'generator=mega~dataset=p0.90.jsonl',
 'generator=mega~dataset=p0.92.jsonl',
 'generator=mega~dataset=p0.94.jsonl',
 'generator=mega~dataset=p0.96.jsonl',
 'generator=mega~dataset=p0.98.jsonl',
 'generator=mega~dataset=p1.00.jsonl']

In [167]:
def get_features_and_labels_for_grover(dataset, vectorizer):
    test_text = [i for i in dataset['X']]
    test_labels = dataset['y'].tolist()
    test_features = vectorizer.transform(test_text)
    return (test_features, test_labels, test_text)

#### dataset mega

In [168]:
grover_ds_filename = 'generator=mega~dataset=p0.94.jsonl'
grover_ds = pd.read_json(os.path.join(grover_data_path, grover_ds_filename), lines = True)
grover_ds_mg = grover_ds[(grover_ds['label'] == 'machine')]
grover_ds_hw = grover_ds[(grover_ds['label'] == 'human')]
grover_mg_pd = create_dataset(grover_ds_mg["article"], is_machine_generated=True)
grover_hw_pd = create_dataset(grover_ds_hw["article"], is_machine_generated=False)

Test on GROVER human-written sentences
Same dataset as p=0.94; same accuracy value expected

In [169]:
grover_hw094_feat, grover_hw094_labels, _ = \
    get_features_and_labels_for_grover(
    grover_hw_pd, vect)

In [170]:
grover_hw094_accuracy = model.score(grover_hw094_feat, grover_hw094_labels)*100.
grover_hw094_accuracy

74.66666666666667

Machine-generated

In [171]:
grover_mg094_feat, grover_mg094_labels, _ = \
    get_features_and_labels_for_grover(
    grover_mg_pd, vect)

In [172]:
grover_mg094_accuracy = model.score(grover_mg094_feat, grover_mg094_labels)*100.
grover_mg094_accuracy

18.39

#### dataset medium

In [173]:
grover_ds_filename = 'generator=medium~dataset=p0.94.jsonl'
grover_ds = pd.read_json(os.path.join(grover_data_path, grover_ds_filename), lines = True)
grover_ds_mg = grover_ds[(grover_ds['label'] == 'machine')]
grover_ds_hw = grover_ds[(grover_ds['label'] == 'human')]
grover_mg_pd = create_dataset(grover_ds_mg["article"], is_machine_generated=True)
grover_hw_pd = create_dataset(grover_ds_hw["article"], is_machine_generated=False)

human-written

In [174]:
grover_hw094_feat, grover_hw094_labels, _ = \
    get_features_and_labels_for_grover(
    grover_hw_pd, vect)

In [175]:
grover_hw094_accuracy = model.score(grover_hw094_feat, grover_hw094_labels)*100.
grover_hw094_accuracy

74.66666666666667

machine-generated

In [176]:
grover_mg094_feat, grover_mg094_labels, _ = \
    get_features_and_labels_for_grover(
    grover_mg_pd, vect)

In [177]:
grover_mg094_accuracy = model.score(grover_mg094_feat, grover_mg094_labels)*100.
grover_mg094_accuracy

19.88

#### dataset base

In [178]:
grover_ds_filename = 'generator=base~dataset=p0.94.jsonl'
grover_ds = pd.read_json(os.path.join(grover_data_path, grover_ds_filename), lines = True)
grover_ds_mg = grover_ds[(grover_ds['label'] == 'machine')]
grover_ds_hw = grover_ds[(grover_ds['label'] == 'human')]
grover_mg_pd = create_dataset(grover_ds_mg["article"], is_machine_generated=True)
grover_hw_pd = create_dataset(grover_ds_hw["article"], is_machine_generated=False)

human-written

In [179]:
grover_hw094_feat, grover_hw094_labels, _ = \
    get_features_and_labels_for_grover(
    grover_hw_pd, vect)

In [180]:
grover_hw094_accuracy = model.score(grover_hw094_feat, grover_hw094_labels)*100.
grover_hw094_accuracy

74.66666666666667

machine-generated

In [181]:
grover_mg094_feat, grover_mg094_labels, _ = \
    get_features_and_labels_for_grover(
    grover_mg_pd, vect)

In [182]:
grover_mg094_accuracy = model.score(grover_mg094_feat, grover_mg094_labels)*100.
grover_mg094_accuracy

21.65

## No cleaned dataset

In [145]:
source='xl-1542M'
webtext_source='webtext'
train_texts, train_labels = load_split(data_dir, source, webtext_source, 'train', n=n_train)
valid_texts, valid_labels = load_split(data_dir, source, webtext_source, 'valid', n=n_valid)
test_texts, test_labels = load_split(data_dir, source, webtext_source, 'test')

In [146]:
vect = TfidfVectorizer(ngram_range=(1, 2), min_df=5, max_features=2**21)
train_features = vect.fit_transform(train_texts)
valid_features = vect.transform(valid_texts)
test_features = vect.transform(test_texts)

In [148]:
model.fit(train_features, train_labels)

In [149]:
filename = 'solaiman_logreg_xl-1542M_notcleaned.sav'
pickle.dump(model, open(filename, 'wb'))

In [150]:
valid_accuracy = model.score(valid_features, valid_labels)*100.
test_accuracy = model.score(test_features, test_labels)*100.

In [151]:
data = {
    'source':source,
    'n_train':n_train,
    'valid_accuracy':valid_accuracy,
    'test_accuracy':test_accuracy
}

In [152]:
data

{'source': 'xl-1542M',
 'n_train': 200000,
 'valid_accuracy': 70.55,
 'test_accuracy': 71.04}

In [144]:
json.dump(data, open(os.path.join(log_dir, f'{source}.json'), 'w'))

In [154]:
source

'xl-1542M'

In [153]:
test_mg_texts, test_mg_labels = load_single_split(data_dir, source, 'test',machine_generated=True)
test_mg_features = vect.transform(test_mg_texts)
test_accuracy = model.score(test_mg_features, test_mg_labels)*100.
test_accuracy

68.60000000000001

In [155]:
webtext_source

'webtext'

In [156]:
test_hw_texts, test_hw_labels = load_single_split(data_dir, webtext_source, 'test', machine_generated=False)
test_hw_features = vect.transform(test_hw_texts)
test_accuracy = model.score(test_hw_features, test_hw_labels)*100.
test_accuracy

73.48