# Evaluate Baseline Model (GPT2-Detector)

## Note

**Note**: According to https://github.com/openai/gpt-2-output-dataset/issues/28, it is required to use
* `transformers == 2.9.1`
* `tokenizers == 0.7.0`

> WARNING: to install these two specific version, you MUST have your python as version of 3.8
>
> Because
> 1. pip does not have pre-compiled wheel for tokenizers v0.7.0 for python 3.9+
> 2. to compile tokenizer, you need to download rust compiler
> 3. tokenizer 0.7.0's Rust source code is using deprecated feature in Rust language, which is NOT supported (error, not warning) by latest rust compiler
> 4. So you can't install tokenizer 0.7.0 on the environment if you are not using python 3.8


to make things work. Installing later versions of transformer leads to error in loading weight file.

In [1]:
import torch
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from pathlib import Path
from transformers import RobertaForSequenceClassification, RobertaTokenizer

from tqdm.notebook import tqdm

from Model import SentinelNonLinear, Sentinel, T5Sentinel
from Dataset import OpenGPTDataset, GPT2_OutputDataset, download_gpt2
from memoizer import memoize

In [2]:
PATH_ROOT = Path(Path.cwd().parent.parent.parent)
PATH_WEBTEXT = Path(PATH_ROOT, "data", "open-web-text-split")
PATH_GPTTEXT = Path(PATH_ROOT, "data", "open-gpt-text-split")
PATH_GPT2_OUTPUT = Path(PATH_ROOT, "data", "gpt2-output")

PATH_CACHE = Path(PATH_ROOT, "result", "cache")

PATH_GPT2_CHECKPT = Path(PATH_CACHE, "detector-base.pt")

SELF_NAME = "evaluate_gpt2_detector.ipynb"

if len([_ for _ in PATH_GPT2_OUTPUT.iterdir()]) == 0: download_gpt2(PATH_GPT2_OUTPUT)
else: print("GPT2 output dataset already downloaded.")

GPT2 output dataset already downloaded.


In [3]:
clean_opengpt_test = OpenGPTDataset(
    str(Path(PATH_WEBTEXT, "test.jsonl")),
    str(Path(PATH_GPTTEXT, "test.jsonl"))
)

dirty_opengpt_test = OpenGPTDataset(
    str(Path(PATH_WEBTEXT, "test-dirty.jsonl")),
    str(Path(PATH_GPTTEXT, "test-dirty.jsonl"))
)

gpt2xl_output_test = GPT2_OutputDataset(
    Path(PATH_GPT2_OUTPUT, "xl-1542M.test.jsonl"),
    Path(PATH_GPT2_OUTPUT, "webtext.test.jsonl")
)

Loading Human text: 100%|███████████████████████████████████| 5000/5000 [00:00<00:00, 127720.14it/s]
Loading GPT text: 100%|█████████████████████████████████████| 5000/5000 [00:00<00:00, 123840.49it/s]

<All data loaded>





In [4]:
def quick_statistics(prediction):
    TP, TN, FP, FN = 0, 0, 0, 0
    key:str
    for key in prediction:
        pred = prediction[key]
        p_gpt, p_web = pred[0], pred[1]
        pred_gpt = p_gpt > p_web
        real_gpt = key.endswith("gpt")

        if pred_gpt and real_gpt: TP += 1
        elif (not pred_gpt) and (not real_gpt): TN += 1
        elif pred_gpt and (not real_gpt): FP += 1
        else: FN += 1
    
    return TP, TN, FP, FN

def report_statistics(TP, TN, FP, FN):
    TPR = TP / (TP + FN)
    TNR = TN / (TN + FP)
    FPR = FP / (FP + TN)
    FNR = FN / (FN + TP)
    print(f"True Positive: {TP} \t| True Negative: {TN}")
    print(f"False Positive:{FP} \t| False Negative:{FN}")
    print(f"True Positive Rate:  {round(TPR * 100, 2)}\%")
    print(f"True Negative Rate:  {round(TNR * 100, 2)}\%")
    print(f"False Positive Rate: {round(FPR * 100, 2)}\%")
    print(f"False Negative Rate: {round(FNR * 100, 2)}\%")
    print(f"Accuracy: {round(((TP + TN) / (TP + TN + FP + FN)) * 100, 2)}\%")
    print(f"F1 Score: {round((TP) / (TP + 0.5 * (FP + FN)), 2)}")

    print("LaTeX Usable-version\n\n")

    print(
    f"{round(((TP + TN) / (TP + TN + FP + FN)) * 100, 2)}\%", "&"
    f"{round(TPR * 100, 2)}\%, ({TP})", "&",
    f"{round(TNR * 100, 2)}\%, ({TN})", "&",
    f"{round(FPR * 100, 2)}\%, ({FP})", "&",
    f"{round(FNR * 100, 2)}\%, ({FN})", "\\\\"
    )

In [5]:
def inference(text, model, tokenizer):
    # returns [P[fake], P[real]]
    # encode
    tokens = tokenizer.encode(text, max_length=512)
    tokens = tokens[:tokenizer.max_len - 2]
    tokens = torch.tensor([tokenizer.bos_token_id] + tokens + [tokenizer.eos_token_id]).unsqueeze(0)
    mask = torch.ones_like(tokens)

    # forward propagation
    with torch.inference_mode():
        logits = model(tokens.to("cuda"), attention_mask=mask.to("cuda"))[0]
        probs = logits.softmax(dim=-1)
    
    # update statistics
    pred = probs.detach().cpu().flatten().numpy()
    return pred

## Evaluate GPT2-Output

### GPT2-Detector on OpenGPTText-Final

In [6]:
checkpoint = torch.load(PATH_GPT2_CHECKPT)
model = RobertaForSequenceClassification.from_pretrained("roberta-base")
tokenizer = RobertaTokenizer.from_pretrained("roberta-base", truncation=True, max_length=510)
model.load_state_dict(checkpoint["model_state_dict"])
model = model.to("cuda")
print("Model Loaded")

Model Loaded


In [7]:
@memoize(Path(PATH_CACHE, "eval_gpt2_opengpt_final.pt"), SELF_NAME)
def calculate_clean():
    prediction_clean = dict()
    for index in tqdm(range(len(clean_opengpt_test))):
        (text, label), (uid, _) = clean_opengpt_test[index]
        prediction = inference(text, model, tokenizer)
        uid = uid + ("-web" if label == 0 else "-gpt")
        prediction_clean[uid] = prediction
    return prediction_clean

prediction_clean = calculate_clean()

Reusing existing cache from /mnt/d/Projects/GPT-Sentinel/result/cache/eval_gpt2_opengpt_final.pt
Cache is generated by evaluate_gpt2_detector.ipynb


In [8]:
print("GPT2-Detector on OpenGPTText-Final")
report_statistics(*quick_statistics(prediction_clean))

GPT2-Detector on OpenGPTText-Final
True Positive: 389 	| True Negative: 1233
False Positive:36 	| False Negative:2555
True Positive Rate:  13.21\%
True Negative Rate:  97.16\%
False Positive Rate: 2.84\%
False Negative Rate: 86.79\%
Accuracy: 38.5\%
F1 Score: 0.23
LaTeX Usable-version


38.5\% &13.21\%, (389) & 97.16\%, (1233) & 2.84\%, (36) & 86.79\%, (2555) \\


### GPT2-Detector on OpenGPTText-Original

In [9]:
@memoize(Path(PATH_CACHE, "eval_gpt2_opengpt_original.pt"), SELF_NAME)
def calculate_original():
    prediction_orig = dict()
    for index in tqdm(range(len(dirty_opengpt_test))):
        (text, label), (uid, _) = dirty_opengpt_test[index]
        prediction = inference(text, model, tokenizer)
        uid = uid + ("web" if label == 0 else "gpt")
        prediction_orig[uid] = prediction
    return prediction_orig

prediction_orig = calculate_original()

Reusing existing cache from /mnt/d/Projects/GPT-Sentinel/result/cache/eval_gpt2_opengpt_original.pt
Cache is generated by evaluate_gpt2_detector.ipynb


In [10]:
print("GPT2-Detector on OpenGPTText-Final")
report_statistics(*quick_statistics(prediction_orig))

GPT2-Detector on OpenGPTText-Final
True Positive: 378 	| True Negative: 1217
False Positive:52 	| False Negative:2566
True Positive Rate:  12.84\%
True Negative Rate:  95.9\%
False Positive Rate: 4.1\%
False Negative Rate: 87.16\%
Accuracy: 37.86\%
F1 Score: 0.22
LaTeX Usable-version


37.86\% &12.84\%, (378) & 95.9\%, (1217) & 4.1\%, (52) & 87.16\%, (2566) \\


### GPT2-Detector on GPT2-Output

In [11]:
@memoize(Path(PATH_CACHE, "eval_gpt2_gpt2_output.pt"), SELF_NAME)
def calculate_gpt2():
    gpt2_prediction_gpt2 = dict()
    for index in tqdm(range(len(gpt2xl_output_test))):
        text, label = gpt2xl_output_test[index]
        prediction = inference(text, model, tokenizer)
        uid = str(index) + ("web" if label == 0 else "gpt")
        gpt2_prediction_gpt2[uid] = prediction
    return gpt2_prediction_gpt2

gpt2_prediction_gpt2 = calculate_gpt2()

Reusing existing cache from /mnt/d/Projects/GPT-Sentinel/result/cache/eval_gpt2_gpt2_output.pt
Cache is generated by evaluate_gpt2_detector.ipynb


In [12]:
print("GPT2-Detector on OpenGPTText-Final")
report_statistics(*quick_statistics(gpt2_prediction_gpt2))

GPT2-Detector on OpenGPTText-Final
True Positive: 4629 	| True Negative: 4681
False Positive:319 	| False Negative:371
True Positive Rate:  92.58\%
True Negative Rate:  93.62\%
False Positive Rate: 6.38\%
False Negative Rate: 7.42\%
Accuracy: 93.1\%
F1 Score: 0.93
LaTeX Usable-version


93.1\% &92.58\%, (4629) & 93.62\%, (4681) & 6.38\%, (319) & 7.42\%, (371) \\
