In [1]:
import json
import pandas as pd
import os

In [2]:
data_path = "./"
embed_path = "./output/train/embed"

In [3]:
import pickle

def load_dataset(file_path, prefix_path=data_path):
    ds = None
    full_path = os.path.join(prefix_path, file_path)
    if os.path.splitext(file_path)[-1] == ".csv":
        ds = pd.read_csv(full_path)
    elif os.path.splitext(file_path)[-1] == ".jsonl":
        ds = pd.read_json(full_path, lines=True)
    elif os.path.splitext(file_path)[-1] == ".pickle":
        with open(full_path, "rb") as f:
            ds = pickle.load(f)
    return ds

# Words-embedding

## Load training datasets

In [21]:
real_data_file = 'webtext.train.jsonl.clean100k.csv'
fake_data_file = 'xl-1542M.train.jsonl.clean100k.csv'

In [151]:
real_data_file = 'webtext.train.jsonl.clean100k.csv'
fake_data_file = 'xl-1542M-k40.train.jsonl.clean100k.csv'

In [152]:
df_real = load_dataset(real_data_file)
df_fake = load_dataset(fake_data_file)

corpus = df_real["text"].to_list() + df_fake["text"].to_list()
labels = [0 for _ in range(len(df_real))] + [1 for _ in range(len(df_fake))]

In [4]:
real_data_file = 'webtext.train.jsonl.clean200k.csv'
fake_data_file_1 = 'xl-1542M-k40.train.jsonl.clean100k.csv'
fake_data_file_2 = 'xl-1542M.train.jsonl.clean100k.csv'

In [5]:
df_real = load_dataset(real_data_file)
df_fake = pd.concat([load_dataset(fake_data_file_1), load_dataset(fake_data_file_2)])

corpus = df_real["text"].to_list() + df_fake["text"].to_list()
labels = [0 for _ in range(len(df_real))] + [1 for _ in range(len(df_fake))]

## Build vocabulary

In [6]:
from nlp_engine.preprocessing import transformers as tfs
from sklearn.pipeline import make_pipeline

In [7]:
tokenizer = make_pipeline(
    tfs.WordTokenizer(), 
    tfs.WordsFilter(drop_symbols=False, drop_digits=True)
)

In [9]:
tokenized_corpus = tokenizer.transform(corpus)

In [10]:
from nlp_engine.analysis import vocabulary

In [11]:
vocab_real = vocabulary.get_vocabulary(tokenized_corpus[:len(df_real)])
vocab_fake = vocabulary.get_vocabulary(tokenized_corpus[len(df_real):])

In [12]:
vocab_shared = vocab_real.intersection(vocab_fake)

In [13]:
print(f'Size of "real" vocabulary: {len(vocab_real)}\nSize of "fake" vocabulary: {len(vocab_fake)}\nSize of intersection: {len(vocab_shared)}')

Size of "real" vocabulary: 500067
Size of "fake" vocabulary: 552962
Size of intersection: 211014


## Compute TF-IDF embedding

In [14]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.decomposition import TruncatedSVD

In [15]:
def identity(x):
    return x

In [16]:
preprocessing_pipeline = make_pipeline(
    tfs.WordTokenizer(),
    tfs.WordsFilter(
        drop_symbols=False, 
        drop_digits=True, 
        whitelist=vocab_shared)
)

In [17]:
%%time
corpus_tokenized = preprocessing_pipeline.transform(corpus)

CPU times: user 3min 11s, sys: 2min 18s, total: 5min 30s
Wall time: 6min 44s


In [18]:
words_pipeline = make_pipeline(
    TfidfVectorizer(
        ngram_range=(1,3), 
        max_features=1000000, 
        sublinear_tf=True,
        min_df=3,
        tokenizer=identity, 
        preprocessor=identity),
    TruncatedSVD(n_components=500)
)

In [19]:
%%time
words_pipeline.fit(corpus_tokenized, labels)



CPU times: user 55min 37s, sys: 59min 22s, total: 1h 55min
Wall time: 1h 52min 29s


Transform training data and store it

In [20]:
real_embed = words_pipeline.transform(corpus_tokenized[:len(df_real["text"].to_list())])
fake_embed = words_pipeline.transform(corpus_tokenized[-len(df_fake["text"].to_list()):])

In [22]:
import pickle

embed_path = "./output/train/embed"

with open(os.path.join(
    embed_path, f"{real_data_file.replace('.jsonl.clean200k.csv','')}.human_embed.pickle"), "wb") as f:
    pickle.dump(real_embed, f)
with open(os.path.join(
    embed_path, f"{'xl-1542M+k40.train.jsonl.clean100k.csv'.replace('.jsonl.clean100k.csv','')}.machine_embed.pickle"), "wb") as f:
    pickle.dump(fake_embed, f)

## Embed test datasets

In [23]:
test_path = "./data/test"
test_dataset_files = sorted([f for f in os.listdir(test_path) if os.path.isfile(os.path.join(test_path, f)) and f[0]!="."])

Store text stats

In [24]:
stats_path = "./output/test/stats"

for test in test_dataset_files:
    with open(os.path.join(test_path, test), "rb") as f:
        X = pickle.load(f)
    X_tokens_raw = tokenizer.transform(X)
    X_tokens = preprocessing_pipeline.transform(X)
    stats = []
    for (tokens_raw, tokens) in zip(X_tokens_raw, X_tokens):
        raw_text_length = len("".join(tokens_raw))
        raw_text_tokens = len(tokens_raw)
        text_length = len("".join(tokens))
        text_tokens = len(tokens)
        if raw_text_tokens > 0:
            ratio = text_tokens/raw_text_tokens
        else:
            ratio = 0
        stats.append([raw_text_length, raw_text_tokens, text_length, text_tokens, ratio])
    df = pd.DataFrame(stats, columns=["raw_chars", "raw_tokens", "chars", "tokens", "ratio"])
    df.to_csv(os.path.join(stats_path,f"{os.path.splitext(test)[0]}_stats.csv"))

In [25]:
embed_path = "./output/test/embed"

for test in test_dataset_files:
    with open(os.path.join(test_path, test), "rb") as f:
        X = pickle.load(f)
    X_embed = words_pipeline.transform(preprocessing_pipeline.transform(X))
    with open(os.path.join(embed_path,f"{os.path.splitext(test)[0]}_embed.pickle"), "wb") as f:
        pickle.dump(X_embed, f)

# TAG-embedding

In [4]:
data_path = "./data/tag"
embed_path = "./data/tag/embed"

In [4]:
data_path = "./data/entities"
embed_path = "./data/entities/embed"

## Load training datasets

In [31]:
real_data_file = 'webtext.train.jsonl.clean100k.csv.tag.pickle'
fake_data_file = 'xl-1542M.train.jsonl.clean100k.csv.tag.pickle'

In [173]:
real_data_file = 'webtext.train.jsonl.clean100k.csv.tag.pickle'
fake_data_file = 'xl-1542M-k40.train.jsonl.clean100k.csv.tag.pickle'

In [174]:
ds_real = load_dataset(real_data_file, data_path)
ds_fake = load_dataset(fake_data_file, data_path)

In [5]:
real_data_file = 'webtext.train.jsonl.clean200k.csv.tag.pickle'
fake_data_file_1 = 'xl-1542M-k40.train.jsonl.clean100k.csv.tag.pickle'
fake_data_file_2 = 'xl-1542M.train.jsonl.clean100k.csv.tag.pickle'

ds_real = load_dataset(real_data_file, data_path)
ds_fake = load_dataset(fake_data_file_1, data_path) + load_dataset(fake_data_file_2, data_path)

In [5]:
real_data_file = 'webtext.train.jsonl.clean200k.csv.entities.pickle'
fake_data_file_1 = 'xl-1542M-k40.train.jsonl.clean100k.csv.entities.pickle'
fake_data_file_2 = 'xl-1542M.train.jsonl.clean100k.csv.entities.pickle'

ds_real = load_dataset(real_data_file, data_path)
ds_fake = load_dataset(fake_data_file_1, data_path) + load_dataset(fake_data_file_2, data_path)

In [6]:
corpus = ds_real + ds_fake
labels = [0 for _ in range(len(ds_real))] + [1 for _ in range(len(ds_fake))]

## Compute TF-IDF embedding

In [7]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.decomposition import TruncatedSVD
from sklearn.pipeline import make_pipeline

In [8]:
def identity(x):
    return x

In [9]:
tags_pipeline = make_pipeline(
    TfidfVectorizer(
        ngram_range=(3,5), 
        max_features=1000000, 
        sublinear_tf=True, 
        tokenizer=identity, 
        preprocessor=identity),
    TruncatedSVD(n_components=300)
)

In [10]:
%%time
tags_pipeline.fit(corpus, labels)



CPU times: user 8min, sys: 1min 55s, total: 9min 55s
Wall time: 3min 3s


Transform training data and store it

In [11]:
real_embed = tags_pipeline.transform(ds_real)
fake_embed = tags_pipeline.transform(ds_fake)

In [12]:
import pickle

embed_path = "./output/train/embed"

with open(os.path.join(
    embed_path, f"{real_data_file.replace('.jsonl.clean200k.csv','').replace('.pickle','')}.human_embed.pickle"), "wb") as f:
    pickle.dump(real_embed, f)
with open(os.path.join(
    embed_path, f"{'xl-1542M+k40.train.jsonl.clean100k.csv.entities.pickle'.replace('.jsonl.clean100k.csv','').replace('.pickle','')}.machine_embed.pickle"), "wb") as f:
    pickle.dump(fake_embed, f)

## Embed test datasets

In [13]:
test_path = "./data/tag/test"
embed_path = "./output/test/embed"
test_dataset_files = sorted([f for f in os.listdir(test_path) if os.path.isfile(os.path.join(test_path, f)) and f[0]!="." and "tag" in f])

In [16]:
test_path = "./data/entities/test"
embed_path = "./output/test/embed"
test_dataset_files = sorted([f for f in os.listdir(test_path) if os.path.isfile(os.path.join(test_path, f)) and f[0]!="." and "entities" in f])

In [17]:
for test in test_dataset_files:
    with open(os.path.join(test_path, test), "rb") as f:
        data = pickle.load(f)
    X_embed = tags_pipeline.transform(data)
    with open(os.path.join(embed_path, test.replace('.pickle','_embed.pickle')), "wb") as f:
        pickle.dump(X_embed, f)

# Classification

## Load training data

In [18]:
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import classification_report
import xgboost as xgb
import multiprocessing
import pickle
import numpy as np
import pandas as pd

In [49]:
tfidf_real_path = "./output/train/embed/webtext.train.human_embed.pickle"
tfidf_fake_path = "./output/train/embed/xl-1542M.train.machine_embed.pickle"
tag_real_path = "./output/train/embed/webtext.train.tag.human_embed.pickle"
tag_fake_path = "./output/train/embed/xl-1542M.train.tag.machine_embed.pickle"

In [185]:
tfidf_real_path = "./output/train/embed/webtext.train.human_embed.pickle"
tfidf_fake_path = "./output/train/embed/xl-1542M-k40.train.machine_embed.pickle"
tag_real_path = "./output/train/embed/webtext.train.tag.human_embed.pickle"
tag_fake_path = "./output/train/embed/xl-1542M-k40.train.tag.machine_embed.pickle"

In [16]:
tfidf_real_path = "./output/train/embed/webtext.train.human_embed.pickle"
tfidf_fake_path = "./output/train/embed/xl-1542M+k40.train.machine_embed.pickle"
tag_real_path = "./output/train/embed/webtext.train.tag.human_embed.pickle"
tag_fake_path = "./output/train/embed/xl-1542M+k40.train.tag.machine_embed.pickle"

In [19]:
tfidf_real_path = "./output/train/embed/webtext.train.human_embed.pickle"
tfidf_fake_path = "./output/train/embed/xl-1542M+k40.train.machine_embed.pickle"
tag_real_path = "./output/train/embed/webtext.train.entities.human_embed.pickle"
tag_fake_path = "./output/train/embed/xl-1542M+k40.train.entities.machine_embed.pickle"

In [20]:
with open(tfidf_real_path, "rb") as f:
    X_tfidf_real = pickle.load(f)
with open(tfidf_fake_path, "rb") as f:
    X_tfidf_fake = pickle.load(f)
X_tfidf = np.concatenate([X_tfidf_real, X_tfidf_fake])
y_train = [0 for _ in range(len(X_tfidf_real))] + [1 for _ in range(len(X_tfidf_fake))]

with open(tag_real_path, "rb") as f:
    X_tag_real = pickle.load(f)
with open(tag_fake_path, "rb") as f:
    X_tag_fake = pickle.load(f)
    
X_tag = np.concatenate([X_tag_real, X_tag_fake])
    
X_tfidf_tag = np.concatenate([X_tfidf, X_tag], axis=1)

## Train models

In [24]:
clf_tfidf = GridSearchCV(
        xgb.XGBClassifier(n_jobs=multiprocessing.cpu_count() // 2, tree_method="hist"),
        #{"max_depth": [3, 4, 5], "n_estimators": [500, 1000, 1500]},
        {"max_depth": [3], "n_estimators": [1500]},
        verbose=1,
        n_jobs=2,
    )

clf_tag = GridSearchCV(
        xgb.XGBClassifier(n_jobs=multiprocessing.cpu_count() // 2, tree_method="hist"),
        {"max_depth": [3, 4, 5], "n_estimators": [500, 1000, 1500]},
        #{"max_depth": [3], "n_estimators": [500]},
        verbose=1,
        n_jobs=2,
    )

clf_tfidf_tag = GridSearchCV(
        xgb.XGBClassifier(n_jobs=multiprocessing.cpu_count() // 2, tree_method="hist"),
        #{"max_depth": [3, 4, 5], "n_estimators": [500, 1000, 1500]},
        {"max_depth": [3], "n_estimators": [1500]},
        verbose=1,
        n_jobs=2,
    )

In [21]:
%%time
clf_tfidf.fit(X_tfidf, y_train)

Fitting 5 folds for each of 1 candidates, totalling 5 fits
CPU times: user 7min 8s, sys: 12.3 s, total: 7min 20s
Wall time: 5min 57s


In [53]:
pd.DataFrame(clf_tfidf.cv_results_)

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_max_depth,param_n_estimators,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
0,55.747911,6.012172,0.096856,0.005631,3,1500,"{'max_depth': 3, 'n_estimators': 1500}",0.716675,0.719025,0.71525,0.716375,0.7159,0.716645,0.001284,1


In [25]:
%%time
clf_tag.fit(X_tag, y_train)

Fitting 5 folds for each of 9 candidates, totalling 45 fits




CPU times: user 1min 48s, sys: 4.59 s, total: 1min 52s
Wall time: 21min 28s


In [26]:
pd.DataFrame(clf_tag.cv_results_)

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_max_depth,param_n_estimators,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
0,25.875197,1.244393,0.089483,0.008272,3,500,"{'max_depth': 3, 'n_estimators': 500}",0.553525,0.553388,0.547825,0.507838,0.50445,0.533405,0.022379,1
1,46.97008,0.365412,0.122043,0.003603,3,1000,"{'max_depth': 3, 'n_estimators': 1000}",0.551737,0.547312,0.546087,0.508175,0.504575,0.531578,0.020695,2
2,67.95938,1.257973,0.162379,0.009714,3,1500,"{'max_depth': 3, 'n_estimators': 1500}",0.54995,0.5453,0.544462,0.510437,0.505112,0.531053,0.019172,3
3,27.848402,0.364825,0.107259,0.003427,4,500,"{'max_depth': 4, 'n_estimators': 500}",0.54585,0.54855,0.5445,0.51155,0.50345,0.53078,0.019224,4
4,54.660727,2.154009,0.169057,0.00997,4,1000,"{'max_depth': 4, 'n_estimators': 1000}",0.543362,0.542525,0.541025,0.511075,0.505075,0.528613,0.016892,5
5,79.395381,0.960192,0.213818,0.004227,4,1500,"{'max_depth': 4, 'n_estimators': 1500}",0.542813,0.5402,0.539587,0.510425,0.5055,0.527705,0.016231,7
6,33.041631,0.336686,0.127507,0.003447,5,500,"{'max_depth': 5, 'n_estimators': 500}",0.5441,0.542987,0.540562,0.507387,0.507012,0.52841,0.017356,6
7,66.473216,2.049366,0.206104,0.008456,5,1000,"{'max_depth': 5, 'n_estimators': 1000}",0.5402,0.540325,0.539312,0.50765,0.50815,0.527127,0.015704,8
8,94.692232,4.145628,0.289195,0.031678,5,1500,"{'max_depth': 5, 'n_estimators': 1500}",0.53745,0.537112,0.536475,0.507475,0.508413,0.525385,0.014247,9


In [23]:
%%time
clf_tfidf_tag.fit(X_tfidf_tag, y_train)

Fitting 5 folds for each of 1 candidates, totalling 5 fits
CPU times: user 12min 5s, sys: 16.7 s, total: 12min 21s
Wall time: 10min 44s


In [57]:
pd.DataFrame(clf_tfidf_tag.cv_results_)

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_max_depth,param_n_estimators,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
0,92.510934,13.71437,0.12882,0.018558,3,1500,"{'max_depth': 3, 'n_estimators': 1500}",0.736575,0.736175,0.7371,0.736225,0.738625,0.73694,0.000905,1


## Test models

In [24]:
from sklearn.metrics import accuracy_score

In [25]:
embed_path = "./output/test/embed"
test_dataset_tag_files = sorted([f for f in os.listdir(embed_path) if os.path.isfile(os.path.join(embed_path, f)) and f[0]!="." and "tag" in f])

In [26]:
test_dataset_files = [x.replace(".tag","") for x in test_dataset_tag_files]

In [135]:
test_dataset_files

['gpt2.large-762M-k40.test.machine_embed.pickle',
 'gpt2.large-762M.test.machine_embed.pickle',
 'gpt2.medium-345M-k40.test.machine_embed.pickle',
 'gpt2.medium-345M.test.machine_embed.pickle',
 'gpt2.small-117M-k40.test.machine_embed.pickle',
 'gpt2.small-117M.test.machine_embed.pickle',
 'gpt2.xl-1542M-k40.test.machine_embed.pickle',
 'gpt2.xl-1542M.test.machine_embed.pickle',
 'gpt3.175b_samples.machine_embed.pickle',
 'grover-base-p0.94.machine_embed.pickle',
 'grover-medium-p0.94.machine_embed.pickle',
 'grover-mega-p0.94.machine_embed.pickle',
 'grover.human_embed.pickle',
 'webtext.test.human_embed.pickle']

In [27]:
results = []
for test_dataset_tag_file in test_dataset_tag_files:
    test_dataset_file = test_dataset_tag_file.replace(".tag","")
    results_line = [".".join(test_dataset_file.split(".")[:-2])]
    if "machine" in test_dataset_file:
        results_line.append("machine")
    else:
        results_line.append("human")
    with open(os.path.join(embed_path, test_dataset_file), "rb") as f:
        X_test_tfidf = pickle.load(f)
        label = int("machine" in test_dataset_file)
        y_test = [label for _ in range(len(X_test_tfidf))]
    with open(os.path.join(embed_path, test_dataset_tag_file), "rb") as f:
        X_test_tag = pickle.load(f)
    X_test_tfidf_tag = np.concatenate([X_test_tfidf, X_test_tag], axis=1)
    results_line.append(len(y_test))
    y_pred = [round(yy) for yy in clf_tfidf.best_estimator_.predict(X_test_tfidf)]
    #print(f'******\nTest dataset: {test_dataset_file.replace("_embed.pickle","")}')
    #print('TF-IDF words:')
    #print(accuracy_score(y_test, y_pred))
    results_line.append(accuracy_score(y_test, y_pred))
    #print('TF-IDF tags:')
    y_pred = [round(yy) for yy in clf_tag.best_estimator_.predict(X_test_tag)]
    #print(accuracy_score(y_test, y_pred))
    results_line.append(accuracy_score(y_test, y_pred))
    #print('TF-IDF words+tags:')
    y_pred = [round(yy) for yy in clf_tfidf_tag.best_estimator_.predict(X_test_tfidf_tag)]
    #print(accuracy_score(y_test, y_pred))
    results_line.append(accuracy_score(y_test, y_pred))
    results.append(results_line)

In [29]:
pd.DataFrame(results, columns=["dataset", "source", "size", "tfidf", "tag", "tfidf_tag"]).to_csv("combined_test_accuracy.csv")

In [28]:
pd.DataFrame(results, columns=["dataset", "source", "size", "tfidf", "tag", "tfidf_tag"])

Unnamed: 0,dataset,source,size,tfidf,tag,tfidf_tag
0,gpt2.large-762M-k40.test,machine,5000,0.8048,0.798,0.8392
1,gpt2.large-762M.test,machine,5000,0.484,0.4262,0.528
2,gpt2.medium-345M-k40.test,machine,5000,0.7748,0.7966,0.8278
3,gpt2.medium-345M.test,machine,5000,0.5094,0.4892,0.6068
4,gpt2.small-117M-k40.test,machine,5000,0.8326,0.8292,0.8688
5,gpt2.small-117M.test,machine,5000,0.5192,0.4336,0.5848
6,gpt2.xl-1542M-k40.test,machine,5000,0.7616,0.7648,0.7962
7,gpt2.xl-1542M.test,machine,5000,0.4498,0.4126,0.484
8,gpt3.175b_samples,machine,485,0.678351,0.68866,0.707216
9,grover-base-p0.94,human,15000,0.791067,0.7188,0.815133


Analyze performance vs document stats

In [30]:
from sklearn.metrics import precision_recall_fscore_support

In [37]:
embed_path = "./output/test/embed"
stats_path = "./output/test/stats"
test_datasets = ["webtext.test.human", "gpt2.xl-1542M.test.machine"]

In [38]:
X_test_tfidf = None
X_test_tag = None
y_test = []
df_stats = None
for test_ds in test_datasets:
    with open(os.path.join(embed_path, f"{test_ds}_embed.pickle"), "rb") as f:
        ds_embed = pickle.load(f)
        label = int("machine" in test_ds)
        if X_test_tfidf is None:
            X_test_tfidf = ds_embed
        else:
            X_test_tfidf = np.concatenate([X_test_tfidf, ds_embed])
        y_test += [label for _ in range(len(ds_embed))]
    with open(os.path.join(embed_path, f"{test_ds}.tag_embed.pickle"), "rb") as f:
        ds_embed = pickle.load(f)
        if X_test_tag is None:
            X_test_tag = ds_embed
        else:
            X_test_tag = np.concatenate([X_test_tag, ds_embed])
    df = pd.read_csv(os.path.join(stats_path, f"{test_ds}_stats.csv"), index_col=0)
    if df_stats is None:
        df_stats = df
    else:
        df_stats = pd.concat([df_stats, df])
X_test_tfidf_tag = np.concatenate([X_test_tfidf, X_test_tag], axis=1)

In [39]:
y_pred_tfidf = [round(yy) for yy in clf_tfidf.best_estimator_.predict(X_test_tfidf)]
y_pred_tag = [round(yy) for yy in clf_tag.best_estimator_.predict(X_test_tag)]
y_pred_tfidf_tag = [round(yy) for yy in clf_tfidf_tag.best_estimator_.predict(X_test_tfidf_tag)]

In [40]:
df_stats["label"] = y_test
df_stats["label_tfidf"] = y_pred_tfidf
df_stats["label_tag"] = y_pred_tag
df_stats["label_tfidf_tag"] = y_pred_tfidf_tag

In [41]:
class_stats = dict(tfidf=[], tag=[], tfidf_tag=[])
for i in sorted(set(df_stats.tokens.to_list())):
    df = df_stats[df_stats.tokens >= i]
    # stop if sample size below 50 elements
    if len(df) < 50:
        break
    for method in class_stats.keys():
        y_true = df.label.to_list()
        y_pred = df[f"label_{method}"].to_list()
        acc = accuracy_score(y_true, y_pred)
        prec, rec, fsc, _ = precision_recall_fscore_support(y_true, y_pred, average='weighted')
        st = [i, acc, prec, rec, fsc] + [item for sublist in precision_recall_fscore_support(y_true, y_pred, average=None) for item in list(sublist)]
        class_stats[method].append(st)

In [42]:
for method in class_stats:
    pd.DataFrame(
        class_stats[method], 
        columns=["min_tokens", "accuracy", "precision_m", "recall_m", "f1_m", "precision_human", "precision_machine", "recall_human", "recall_machine", "f1_human", "f1_machine", "support_human", "support_machine"]
    ).to_csv(f"{method}_stats.csv")