In [1]:
import time
import requests
import numpy as np
import os
import json
import plotly.express as px
import pandas as pd
pd.set_option('display.max_colwidth', 400)

from DetectGPT.model import GPT2PPLV2 as GPT2PPL
from GLTR.api import LM as GLTR
from transformers import OpenAIGPTLMHeadModel, OpenAIGPTTokenizer, pipeline
from transformers import logging
logging.set_verbosity_error()

import warnings
warnings.filterwarnings("ignore")

from tqdm import tqdm

from IPython.core.display import display, HTML
display(HTML("<style>.container { width:95% !important; }</style>"))

C:\Users\daire\anaconda3\lib\site-packages\numpy\.libs\libopenblas.EL2C6PLE4ZYW3ECEVIV3OXXGRN2NRFM2.gfortran-win_amd64.dll
C:\Users\daire\anaconda3\lib\site-packages\numpy\.libs\libopenblas.PYQHXLVVQ7VESDPUVUADXEVJOBGHJPAY.gfortran-win_amd64.dll


Found API <class 'GLTR.api.LM'> with name gpt-2-small


In [2]:
# 1 indicated AI produced, 0 indicates human produced

In [3]:
def readJSON(location):
    with open(location, encoding = "utf-8") as outfile:
        data = json.load(outfile)
    return pd.DataFrame(data)

In [4]:
def getRobertaScore(text):
    """
    https://huggingface.co/roberta-base-openai-detector
    """
    model = pipeline("text-classification", model="roberta-base-openai-detector")
    vals = model(text, top_k=3)
    flag = sorted(vals, key = lambda x: x["score"])[-1]["label"]
    flag = 1 if flag == "Fake" else 0
    
    return {**{f"roberta_score_{j['label']}": j["score"] for j in vals}, **{"roberta_Label": flag}}
        

In [5]:
def getRobertaLargeScore(text):
    """
    https://huggingface.co/roberta-large-openai-detector
    """
    model = pipeline("text-classification", model="roberta-large-openai-detector")
    vals = model(text, top_k=3)
    flag = sorted(vals, key = lambda x: x["score"])[-1]["label"]
    flag = 1 if flag == "LABEL_0" else 0
    
    d = {"LABEL_1":"0", "LABEL_0":1}
    
    return {**{f"roberta_large_score_{d[j['label']]}": j["score"] for j in vals}, **{"roberta_large_Label": flag}}


In [6]:
def getDetectGPTScore(text):
    """
    https://www.arxiv-vanity.com/papers/2301.11305/
    Slow though...
    """
    model = GPT2PPL()
    vals = model(text, len(text), "v1.1")
    flag = 1 - vals[0]
    
    return {"gpt_detect_mean_score": vals[2], 
            "gpt_detect_mean_probability": vals[3], 
            "gpt_detect_Label": flag}

In [7]:
def getGPTZeroScore(text):
    """
    https://gptzero.me/
    """
    model = GPT2PPL()
    vals = model(text, None, "v1")
    flag = 1 - vals[0]["label"]
    
    return {"gpt_zero_Perplexity": vals[0]["Perplexity"], 
            "gpt_zero_Burtiness": vals[0]["Burstiness"], 
            "gpt_zero_Label": flag}

In [8]:
def getGLTRScore(text, threshold = 0.7):
    """
    http://gltr.io/
    """
    gltr = GLTR()
    def f(x):
        return int(np.where(
            x<10, 0, np.where(x<100, 1, np.where(x<1000, 2, 3))))

    def p(x, vals):
        return sum(np.array(vals) == x)/len(vals)

    valsALL = gltr.check_probabilities(text)
    vals = [f(i[0]) for i in valsALL["real_topk"]]
    
    flag = 1 if p(0, vals) > threshold else 0 
    
    return {"gltr_0": p(0, vals), 
            "gltr_1": p(1, vals),
            "gltr_2": p(2, vals),
            "gltr_3": p(3, vals),
            "gltr_Label": flag}

In [9]:
# %%time
# roberta_dict = getRobertaScore(textSub)
# roberta_large_dict = getRobertaLargeScore(textSub)
# gpt_zero_dict = getGPTZeroScore(textSub)
# gpt_detect_dict = getDetectGPTScore(textSub)
# gltr_dict = getGLTRScore(textSub)

# {**roberta_dict, **gpt_zero_dict, **gpt_detect_dict, **roberta_large_dict, **gltr_dict}

In [10]:
def isAIGenerated(text, chunksize = 300, step_size = 300):
    """
    Detect if a function is AI generated or not.
    This will return several scores, which are
        * GPTZero - https://gptzero.me/
        * Roberta - https://huggingface.co/roberta-base-openai-detector
        * Roberta Large - https://huggingface.co/roberta-large-openai-detector
        * Potentially GLTR in future... - http://gltr.io/dist/index.html
    
    ::param text: (str)
    ::param chunksize: (int)
    ::param step_size: (int)
    
    ::return: (dict[str: float])
    """
    data = pd.DataFrame()
    savedText = []
    textSplit = text.split()
    try:
        for i in range(np.max(1, len(textSplit)//step_size)):
            textSub = " ".join(textSplit[step_size*i:(step_size*i) + chunksize])

            roberta_dict = getRobertaScore(textSub)
            roberta_large_dict = getRobertaLargeScore(textSub)
            gpt_zero_dict = getGPTZeroScore(textSub)
            gpt_detect_dict = {}#getDetectGPTScore(textSub)
            gltr_dict = getGLTRScore(textSub)

            data = data.append({**roberta_dict, **gpt_zero_dict, **gpt_detect_dict, **roberta_large_dict, **gltr_dict}, ignore_index=True)
            savedText += [textSub]
        data["Text"] = savedText
        return data
    except:
        return pd.DataFrame()

In [11]:
# import time

# textSub = """
# In the year 2050, artificial intelligence has transformed every aspect of human life. From self-driving cars to intelligent personal assistants, AI has become an indispensable part of our daily routine. People now live in smart homes where AI-powered systems control the temperature, lighting, and security with perfect precision.
# In the field of medicine, AI has revolutionized healthcare. Advanced algorithms analyze vast amounts of medical data to diagnose diseases at an early stage and recommend personalized treatments. Surgeries are performed with the assistance of surgical robots, ensuring unparalleled precision and minimizing human error.
# Education has also undergone a significant transformation. AI tutors provide personalized learning experiences, adapting to each student's unique needs and learning style. Virtual reality simulations create immersive environments for students to explore various subjects, making education more engaging and interactive than ever before.
# AI has even extended its reach to the creative realm. AI-generated artwork, music, and literature have gained recognition and appreciation among audiences worldwide. Machines have become proficient in composing symphonies, painting masterpieces, and crafting compelling stories that evoke emotions and captivate the imagination.
# While AI has brought numerous benefits, it has also raised ethical concerns. The increasing reliance on AI has led to discussions about job displacement and the potential loss of human touch in various industries. Striking the right balance between automation and human involvement remains a critical challenge for society.
# As AI continues to advance, the possibilities seem limitless. The future holds promises of further breakthroughs in areas such as quantum computing, deep learning, and neural interfaces. It is an exciting time to witness the ever-evolving landscape of artificial intelligence and its impact on shaping our future.
# """

# print("RoBERTa")
# st = time.time()
# roberta_dict = getRobertaScore(textSub)
# print(f"Took {round(time.time() - st, 2)} seconds")
# print("RoBERTa Large")
# st = time.time()
# roberta_large_dict = getRobertaLargeScore(textSub)
# print(f"Took {round(time.time() - st, 2)} seconds")
# print("GPTZero")
# st = time.time()
# gpt_zero_dict = getGPTZeroScore(textSub)
# print(f"Took {round(time.time() - st, 2)} seconds")
# print("DetectGPT")
# st = time.time()
# gpt_detect_dict = getDetectGPTScore(textSub)
# print(f"Took {round(time.time() - st, 2)} seconds")
# print("GLTR")
# st = time.time()
# gltr_dict = getGLTRScore(textSub)
# print(f"Took {round(time.time() - st, 2)} seconds")

# {**roberta_dict, **roberta_large_dict, **gpt_zero_dict, **gpt_detect_dict, **gltr_dict}

In [12]:
validation = readJSON("validation.json")
validation["id"] = range(len(validation))
validation["model"].value_counts()

Human    306
GPT4     236
GPT3     207
Name: model, dtype: int64

In [13]:
validation["length"] = validation["text"].apply(lambda x: len(x))
validation["length"].min(), validation["length"].max()

(333, 5432)

In [14]:
%%time
allData, metaData = pd.DataFrame(), pd.DataFrame()

print(f"Length of datframe: {len(validation)}")
for num, i in enumerate(validation.iterrows()):
    st = time.time()
    text = i[1]["text"]
    id_ = i[1]["id"]
    model = i[1]["model"]

    values = isAIGenerated(text)
    metrics = values.describe().reset_index()
    
    values["id"] = id_
    metrics["id"] = id_

    values["model"] = id_
    metrics["model"] = id_

    allData = pd.concat([allData, metrics])
    metaData = pd.concat([metaData, values])
    print(f"{num}/{len(validation)} --- Text Size: {len(text.split())} --- Took {round(time.time() - st, 2)} seconds.")

allData.to_csv("MetricsValidation.csv")
metaData.to_excel("MetaValidation.xlsx")

Length of datframe: 749
0/749 --- Text Size: 84 --- Took 14.42 seconds.
1/749 --- Text Size: 83 --- Took 13.05 seconds.
2/749 --- Text Size: 124 --- Took 13.83 seconds.
3/749 --- Text Size: 83 --- Took 12.97 seconds.
4/749 --- Text Size: 84 --- Took 12.67 seconds.
5/749 --- Text Size: 90 --- Took 13.06 seconds.
6/749 --- Text Size: 134 --- Took 13.82 seconds.
7/749 --- Text Size: 98 --- Took 12.95 seconds.
8/749 --- Text Size: 94 --- Took 12.87 seconds.
9/749 --- Text Size: 92 --- Took 12.71 seconds.
10/749 --- Text Size: 83 --- Took 12.63 seconds.
11/749 --- Text Size: 99 --- Took 12.96 seconds.
12/749 --- Text Size: 78 --- Took 12.57 seconds.
13/749 --- Text Size: 81 --- Took 12.68 seconds.
14/749 --- Text Size: 73 --- Took 12.67 seconds.
15/749 --- Text Size: 91 --- Took 12.9 seconds.
16/749 --- Text Size: 103 --- Took 13.06 seconds.
17/749 --- Text Size: 75 --- Took 12.6 seconds.
18/749 --- Text Size: 71 --- Took 12.46 seconds.
19/749 --- Text Size: 91 --- Took 12.76 seconds.
20/74

AxisError: axis 1 is out of bounds for array of dimension 0

In [15]:
# metaData = pd.read_excel("MetaValidation.xlsx")

In [16]:
d = validation.merge(metaData.groupby("id").mean().reset_index(), on = "id")[["kind", "gpt_zero_Label", "gltr_Label", "roberta_Label", "roberta_large_Label"]]
d["Label"] = d["kind"].map({"Human-Written":0}).fillna(1)

In [17]:
d[["Label", "gpt_zero_Label", "gltr_Label", "roberta_Label", "roberta_large_Label"]]

Unnamed: 0,Label,gpt_zero_Label,gltr_Label,roberta_Label,roberta_large_Label
0,1.0,1.0,0.0,0.0,0.0
1,1.0,0.0,0.0,0.0,0.0
2,1.0,1.0,1.0,0.0,0.0
3,1.0,1.0,1.0,0.0,0.0
4,1.0,1.0,0.0,0.0,0.0
...,...,...,...,...,...
112,0.0,1.0,0.0,0.0,0.0
113,0.0,1.0,0.0,0.0,0.0
114,0.0,0.0,0.0,0.0,0.0
115,0.0,0.0,0.0,0.0,0.0


In [18]:
from sklearn.metrics import classification_report

In [19]:
d.groupby("Label").mean()

Unnamed: 0_level_0,gpt_zero_Label,gltr_Label,roberta_Label,roberta_large_Label
Label,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0.0,0.307692,0.076923,0.0,0.0
1.0,0.813187,0.307692,0.043956,0.010989


In [20]:
len(d)

117

In [21]:
for LABEL in ["gpt_zero_Label", "gltr_Label","roberta_Label","roberta_large_Label"]:
    print(LABEL)
    print(classification_report(d["Label"].astype(bool).values, d[LABEL].astype(bool).values))
    print("")

gpt_zero_Label
              precision    recall  f1-score   support

       False       0.51      0.69      0.59        26
        True       0.90      0.81      0.86        91

    accuracy                           0.79       117
   macro avg       0.71      0.75      0.72       117
weighted avg       0.82      0.79      0.80       117


gltr_Label
              precision    recall  f1-score   support

       False       0.28      0.92      0.42        26
        True       0.93      0.31      0.46        91

    accuracy                           0.44       117
   macro avg       0.60      0.62      0.44       117
weighted avg       0.79      0.44      0.45       117


roberta_Label
              precision    recall  f1-score   support

       False       0.23      1.00      0.37        26
        True       1.00      0.04      0.08        91

    accuracy                           0.26       117
   macro avg       0.62      0.52      0.23       117
weighted avg       0.83      0.2