# Benchmark of existing approaches for detecting machine-generated text

This notebook enables to evaluate the logistic regression-based text classifier by Solaiman et al.

## Solaiman: Logistic Regression

_Irene Solaiman, Miles Brundage, Jack Clark, Amanda Askell, Ariel Herbert-Voss, Jeff Wu, Alec Radford,
Gretchen Krueger, Jong Wook Kim, Sarah Kreps, Miles McCain, Alex Newhouse, Jason Blazakis, Kris McGuffie, and Jasmine Wang. 2019. Release strategies and the social impacts of language models._

Implementation of the text classifier based on logistic regression.

Logic extracted form `baseline.py` by Solaiman et al.: [https://github.com/openai/gpt-2-output-dataset/tree/master](https://github.com/openai/gpt-2-output-dataset/tree/master)

### Install dependencies
To run the code in this notebook, you must install the required dependencies ([https://github.com/HendrikStrobelt/detecting-fake-text/blob/master/requirements.txt](detecting-fake-text/requirements.txt)). 

See the `gtc_solaiman_RoBERTa.ipynb` notebook for further information.


## Evaluation (with downsampling)

This section is devoted to evaluating the Solaiman et al. model under different datasets. 

Here, we consider **pre-processed** dataset, with downsampling. We aim to evalute the Solaiman et al. technique on a dataset with limited noise. 

### Training

Definition of imports and auxiliary functions

In [1]:
import os
import json
import numpy as np
from scipy import sparse

from sklearn.model_selection import PredefinedSplit, GridSearchCV
from sklearn.linear_model import LogisticRegression
from sklearn.feature_extraction.text import TfidfVectorizer

import pickle

In [2]:
def _load_split(data_dir, source, n=np.inf):
    path = os.path.join(data_dir, source)
    texts = []
    for i, line in enumerate(open(path)):
        if i >= n:
            break
        texts.append(json.loads(line)['text'])
    return texts

def load_split(data_dir, source, webtext_source, n=np.inf):
    webtext = _load_split(data_dir, webtext_source, n=n//2)
    gen = _load_split(data_dir, source, n=n//2)
    texts = webtext+gen
    labels = [0]*len(webtext)+[1]*len(gen)
    return texts, labels


def load_single_split(data_dir, source, machine_generated=True, n=np.inf):
    data = _load_split(data_dir, source, n)
    if machine_generated:
        labels = [1]*len(data)
    else:
        labels = [0]*len(data)
    return data, labels

In [3]:
model = LogisticRegression(solver='liblinear')

Train the classifier using subsets of the GPT2vsWebText dataset

In [4]:
n_train=200000
n_valid=10000
n_jobs=-1
verbose=False

In [6]:
train_texts, train_labels = load_split('./data/training/', 'GPT2-xl-1542M-k40.train.filtered.jsonl', 'webtext.train.filtered.jsonl', n=n_train)
valid_texts, valid_labels = load_split('./data/validation/', 'GPT2-xl-1542M-k40.valid.machine.jsonl', 'webtext.valid.human.jsonl', n=n_valid)
test_texts, test_labels = load_split('./data/test/', 'GPT2-xl-1542M-k40.test.machine.jsonl', 'webtext.test.human.jsonl')

In [7]:
len(train_texts)

200000

In [8]:
len(test_texts)

10000

In [9]:
vect = TfidfVectorizer(ngram_range=(1, 2), min_df=5, max_features=2**21)
train_features = vect.fit_transform(train_texts)

In [10]:
valid_features = vect.transform(valid_texts)
test_features = vect.transform(test_texts)

skip the following grid search grid and use the default params

In [11]:
params = {'C': [1/64, 1/32, 1/16, 1/8, 1/4, 1/2, 1, 2, 4, 8, 16, 32, 64]}
# params = {'C': [8, 16]}
split = PredefinedSplit([-1]*n_train+[0]*n_valid)
search = GridSearchCV(model, params, cv=split, n_jobs=n_jobs, verbose=verbose, refit=False)
search.fit(sparse.vstack([train_features, valid_features]), train_labels+valid_labels)
model = model.set_params(**search.best_params_)

In [12]:
model.fit(train_features, train_labels)

In [13]:
search.best_params_

{'C': 16}

In [15]:
valid_accuracy = model.score(valid_features, valid_labels)*100.
test_accuracy = model.score(test_features, test_labels)*100.

In [16]:
valid_accuracy

91.27

In [17]:
test_accuracy

### Experiments on WebText + GPT2-xl-1542M data

In [18]:
data_dir = './data/test/'
source='GPT2-xl-1542M-k40.test.machine.jsonl'
webtext_source='webtext.test.human.jsonl'

In [19]:
test_mg_texts, test_mg_labels = load_single_split(data_dir, source, machine_generated=True)
test_mg_features = vect.transform(test_mg_texts)

In [20]:
test_accuracy = model.score(test_mg_features, test_mg_labels)*100.

In [21]:
test_accuracy

89.74

In [22]:
webtext_source

'webtext.test.human.jsonl'

In [23]:
test_hw_texts, test_hw_labels = load_single_split(data_dir, webtext_source, machine_generated=False)
test_hw_features = vect.transform(test_hw_texts)
test_accuracy = model.score(test_hw_features, test_hw_labels)*100.
test_accuracy

91.58

### Experiments on GPT3-175b Machine generated text

In [157]:
import numpy as np
import pandas as pd
from sklearn.utils import shuffle

def create_dataset(raw_data, is_machine_generated=True):
    X = np.array(raw_data)
    if is_machine_generated:
        y = np.ones_like(X, dtype=int)
    else:
        y = np.zeros_like(X, dtype=int)
    return pd.DataFrame(data={'X':X, 'y':y})

In [158]:
gpt3_data_path = "./data/test/"
gpt3_ds_mg_filename = 'GPT3-175b.test.machine.jsonl'
with open(os.path.join(gpt3_data_path, gpt3_ds_mg_filename), 'r') as file:
    gpt3_ds_mg = [x for x in file.readlines() if x is not None and x != "\n"]
    

In [159]:
gpt3_mg_pd = create_dataset(gpt3_ds_mg, is_machine_generated=True)

In [161]:
def get_features_and_labels_for_gpt3(dataset, vectorizer):
    gpt3_test_text = [i[1:-2] for i in dataset['X']]
    gpt3_test_labels = dataset['y'].tolist()
    gpt3_test_features = vectorizer.transform(gpt3_test_text)
    return (gpt3_test_features, gpt3_test_labels, gpt3_test_text)

In [162]:
g3_mg_feat, g3_mg_labels, _ = get_features_and_labels_for_gpt3(
    gpt3_mg_pd, vect)

In [163]:
gpt3_mg_accuracy = model.score(g3_mg_feat, g3_mg_labels)*100.

In [164]:
gpt3_mg_accuracy

15.257731958762887

In [160]:
gpt3_mg_pd

Unnamed: 0,X,y
0,"""Glacier Ridge Christian School\n\nGlacier Rid...",1
1,"""His father was a professor and his mother was...",1
2,"""Image caption Bailiffs removed protesters fro...",1
3,"""Contents\n\n1. The Meaning of Love\n\n2. Our ...",1
4,"""Interviews\n\nInterview with Ariel Efraim (So...",1
...,...,...
480,"""Get Rangers updates directly to your inbox Su...",1
481,"""Posted 01 September 2010 - 07:51 PM\n\nUsing ...",1
482,"""Jeff Chapman\n\nJeffrey Scott Chapman (born M...",1
483,"""You can look into the Issue Tracker and repor...",1


In [183]:
model

### Experiments on GROVER dataset p=0.94

In [165]:
grover_data_path = "./data/test/"

In [167]:
def get_features_and_labels_for_grover(dataset, vectorizer):
    test_text = [i for i in dataset['X']]
    test_labels = dataset['y'].tolist()
    test_features = vectorizer.transform(test_text)
    return (test_features, test_labels, test_text)

#### dataset mega

In [168]:
grover_ds_mg_filename = 'Grover-mega-p0.94.test.machine.jsonl'
grover_ds_hw_filename = 'Grover.human.jsonl'
grover_ds_mg = pd.read_json(os.path.join(grover_data_path, grover_ds_mg_filename), lines = True)
grover_ds_hw = pd.read_json(os.path.join(grover_data_path, grover_ds_hw_filename), lines = True)
grover_mg_pd = create_dataset(grover_ds_mg["text"], is_machine_generated=True)
grover_hw_pd = create_dataset(grover_ds_hw["text"], is_machine_generated=False)

Test on GROVER human-written sentences
Same dataset as p=0.94; same accuracy value expected

In [169]:
grover_hw094_feat, grover_hw094_labels, _ = \
    get_features_and_labels_for_grover(
    grover_hw_pd, vect)

In [170]:
grover_hw094_accuracy = model.score(grover_hw094_feat, grover_hw094_labels)*100.
grover_hw094_accuracy

74.66666666666667

Machine-generated

In [171]:
grover_mg094_feat, grover_mg094_labels, _ = \
    get_features_and_labels_for_grover(
    grover_mg_pd, vect)

In [172]:
grover_mg094_accuracy = model.score(grover_mg094_feat, grover_mg094_labels)*100.
grover_mg094_accuracy

18.39

#### dataset medium

In [173]:
grover_ds_mg_filename = 'Grover-medium-p0.94.test.machine.jsonl'
grover_ds_hw_filename = 'Grover.human.jsonl'
grover_ds_mg = pd.read_json(os.path.join(grover_data_path, grover_ds_mg_filename), lines = True)
grover_ds_hw = pd.read_json(os.path.join(grover_data_path, grover_ds_hw_filename), lines = True)
grover_mg_pd = create_dataset(grover_ds_mg["text"], is_machine_generated=True)
grover_hw_pd = create_dataset(grover_ds_hw["text"], is_machine_generated=False)

human-written

In [174]:
grover_hw094_feat, grover_hw094_labels, _ = \
    get_features_and_labels_for_grover(
    grover_hw_pd, vect)

In [175]:
grover_hw094_accuracy = model.score(grover_hw094_feat, grover_hw094_labels)*100.
grover_hw094_accuracy

74.66666666666667

machine-generated

In [176]:
grover_mg094_feat, grover_mg094_labels, _ = \
    get_features_and_labels_for_grover(
    grover_mg_pd, vect)

In [177]:
grover_mg094_accuracy = model.score(grover_mg094_feat, grover_mg094_labels)*100.
grover_mg094_accuracy

19.88

#### dataset base

In [178]:
grover_ds_mg_filename = 'Grover-base-p0.94.test.machine.jsonl'
grover_ds_hw_filename = 'Grover.human.jsonl'
grover_ds_mg = pd.read_json(os.path.join(grover_data_path, grover_ds_mg_filename), lines = True)
grover_ds_hw = pd.read_json(os.path.join(grover_data_path, grover_ds_hw_filename), lines = True)
grover_mg_pd = create_dataset(grover_ds_mg["text"], is_machine_generated=True)
grover_hw_pd = create_dataset(grover_ds_hw["text"], is_machine_generated=False)

human-written

In [179]:
grover_hw094_feat, grover_hw094_labels, _ = \
    get_features_and_labels_for_grover(
    grover_hw_pd, vect)

In [180]:
grover_hw094_accuracy = model.score(grover_hw094_feat, grover_hw094_labels)*100.
grover_hw094_accuracy

74.66666666666667

machine-generated

In [181]:
grover_mg094_feat, grover_mg094_labels, _ = \
    get_features_and_labels_for_grover(
    grover_mg_pd, vect)

In [182]:
grover_mg094_accuracy = model.score(grover_mg094_feat, grover_mg094_labels)*100.
grover_mg094_accuracy

21.65

## Evaluation (no downsampling)

This section is devoted to evaluating the Solaiman et al. model under different datasets. 

Here, we consider **raw** dataset, with no downsampling.

### Training

In [None]:
train_texts, train_labels = load_split('./data/training/', 'GPT2-xl-1542M.train.filtered.jsonl', 'webtext.train.filtered.jsonl', n=n_train)
valid_texts, valid_labels = load_split('./data/validation/', 'GPT2-xl-1542M.valid.machine.jsonl', 'webtext.valid.machine.jsonl', n=n_valid)
test_texts, test_labels = load_split('./data/test/', 'GPT2-xl-1542M.test.machine.jsonl', 'webtext.test.human.jsonl')

In [146]:
vect = TfidfVectorizer(ngram_range=(1, 2), min_df=5, max_features=2**21)
train_features = vect.fit_transform(train_texts)
valid_features = vect.transform(valid_texts)
test_features = vect.transform(test_texts)

In [148]:
model.fit(train_features, train_labels)

In [150]:
valid_accuracy = model.score(valid_features, valid_labels)*100.
test_accuracy = model.score(test_features, test_labels)*100.

### Experiments on WebText + GPT2-xl-1542M data

In [8]:
data_dir = './data/test/'
source='GPT2-xl-1542M.test.machine.jsonl'
webtext_source='webtext.test.human.jsonl'

In [7]:
source

'GPT2-xl-1542M.test.machine.jsonl'

In [153]:
test_mg_texts, test_mg_labels = load_single_split(data_dir, source, machine_generated=True)
test_mg_features = vect.transform(test_mg_texts)
test_accuracy = model.score(test_mg_features, test_mg_labels)*100.
test_accuracy

68.60000000000001

In [155]:
webtext_source

'webtext'

In [156]:
test_hw_texts, test_hw_labels = load_single_split(data_dir, webtext_source, machine_generated=False)
test_hw_features = vect.transform(test_hw_texts)
test_accuracy = model.score(test_hw_features, test_hw_labels)*100.
test_accuracy

73.48

### Experiments on GPT3-175b Machine generated text

In [None]:
gpt3_data_path = "./data/test/"
gpt3_ds_mg_filename = 'GPT3-175b.test.machine.jsonl'
with open(os.path.join(gpt3_data_path, gpt3_ds_mg_filename), 'r') as file:
    gpt3_ds_mg = [x for x in file.readlines() if x is not None and x != "\n"]
gpt3_mg_pd = create_dataset(gpt3_ds_mg, is_machine_generated=True)

In [None]:
g3_mg_feat, g3_mg_labels, _ = get_features_and_labels_for_gpt3(gpt3_mg_pd, vect)

In [None]:
gpt3_mg_accuracy = model.score(g3_mg_feat, g3_mg_labels)*100.

In [None]:
gpt3_mg_accuracy

### Experiments on GROVER dataset p=0.94

#### dataset mega

In [None]:
grover_data_path = "./data/test/"

grover_ds_mg_filename = 'Grover-mega-p0.94.test.machine.jsonl'
grover_ds_hw_filename = 'Grover.human.jsonl'
grover_ds_mg = pd.read_json(os.path.join(grover_data_path, grover_ds_mg_filename), lines = True)
grover_ds_hw = pd.read_json(os.path.join(grover_data_path, grover_ds_hw_filename), lines = True)
grover_mg_pd = create_dataset(grover_ds_mg["text"], is_machine_generated=True)
grover_hw_pd = create_dataset(grover_ds_hw["text"], is_machine_generated=False)

Test on GROVER human-written sentences
Same dataset as p=0.94; same accuracy value expected

In [None]:
grover_hw094_feat, grover_hw094_labels, _ = \
    get_features_and_labels_for_grover(
    grover_hw_pd, vect)

In [170]:
grover_hw094_accuracy = model.score(grover_hw094_feat, grover_hw094_labels)*100.
grover_hw094_accuracy

74.66666666666667

Machine-generated

In [171]:
grover_mg094_feat, grover_mg094_labels, _ = \
    get_features_and_labels_for_grover(
    grover_mg_pd, vect)

In [172]:
grover_mg094_accuracy = model.score(grover_mg094_feat, grover_mg094_labels)*100.
grover_mg094_accuracy

18.39

#### dataset medium

In [None]:
grover_ds_mg_filename = 'Grover-medium-p0.94.test.machine.jsonl'
grover_ds_hw_filename = 'Grover.human.jsonl'
grover_ds_mg = pd.read_json(os.path.join(grover_data_path, grover_ds_mg_filename), lines = True)
grover_ds_hw = pd.read_json(os.path.join(grover_data_path, grover_ds_hw_filename), lines = True)
grover_mg_pd = create_dataset(grover_ds_mg["text"], is_machine_generated=True)
grover_hw_pd = create_dataset(grover_ds_hw["text"], is_machine_generated=False)

human-written

In [None]:
grover_hw094_feat, grover_hw094_labels, _ = \
    get_features_and_labels_for_grover(
    grover_hw_pd, vect)


In [175]:
grover_hw094_accuracy = model.score(grover_hw094_feat, grover_hw094_labels)*100.
grover_hw094_accuracy

74.66666666666667

machine-generated

In [None]:
grover_mg094_feat, grover_mg094_labels, _ = \
    get_features_and_labels_for_grover(
    grover_mg_pd, vect)

In [177]:
grover_mg094_accuracy = model.score(grover_mg094_feat, grover_mg094_labels)*100.
grover_mg094_accuracy

19.88

#### dataset base

In [None]:
grover_ds_mg_filename = 'Grover-base-p0.94.test.machine.jsonl'
grover_ds_hw_filename = 'Grover.human.jsonl'
grover_ds_mg = pd.read_json(os.path.join(grover_data_path, grover_ds_mg_filename), lines = True)
grover_ds_hw = pd.read_json(os.path.join(grover_data_path, grover_ds_hw_filename), lines = True)
grover_mg_pd = create_dataset(grover_ds_mg["text"], is_machine_generated=True)
grover_hw_pd = create_dataset(grover_ds_hw["text"], is_machine_generated=False)

human-written

In [None]:
grover_hw094_feat, grover_hw094_labels, _ = \
    get_features_and_labels_for_grover(
    grover_hw_pd, vect)

In [180]:
grover_hw094_accuracy = model.score(grover_hw094_feat, grover_hw094_labels)*100.
grover_hw094_accuracy

74.66666666666667

machine-generated

In [None]:
grover_mg094_feat, grover_mg094_labels, _ = \
    get_features_and_labels_for_grover(
    grover_mg_pd, vect)

In [182]:
grover_mg094_accuracy = model.score(grover_mg094_feat, grover_mg094_labels)*100.
grover_mg094_accuracy

21.65