In [3]:
from pathlib import Path
import pandas as pd
import os, requests
from dotenv import load_dotenv
load_dotenv()

dataset = pd.read_csv(Path("./data/dataset.csv"))
dataset

Unnamed: 0,essay_name,essay,words,chars,is_gpt
0,human_1.txt,The protagonist Yossarian in Joseph Heller's C...,916,4451,0
1,human_2.txt,Steven Pinker’s “A History of Violence: Edge M...,577,3049,0
2,human_3.txt,"In the play “Death of a Salesman”, the four ma...",581,2765,0
3,human_4.txt,In the novel A Portrait of the Artist as a You...,420,2099,0
4,human_5.txt,Fashion is a puzzling topic. Fashion appeals t...,377,1959,0
...,...,...,...,...,...
107,gpt_54.txt,"In Jon Krakauer’s novel, Into The Wild, Christ...",416,2300,1
108,gpt_55.txt,"Umberto Eco's essay ""Ur-Fascism"" defines fasci...",125,651,1
109,gpt_56.txt,"In Ray Bradbury's ""The Pedestrian,"" the light ...",115,589,1
110,gpt_57.txt,"In the dimly lit streets of Victorian London, ...",154,608,1


In [4]:
def originality_ai_output(dataset: pd.DataFrame):
    # label records with orginality.ai
    url = "https://api.originality.ai/api/v1/scan/ai"
    originality_api_key = os.getenv("ORIGINALITY_API_KEY")

    originality_score_original, originality_score_ai, originality_score_overall = [
        {},
        {},
        {},
    ]

    for index, row in dataset.iterrows():
        payload = {"content": row["essay"], "aiModelVersion": "1"}
        headers = {"X-OAI-API-KEY": originality_api_key, "Accept": "application/json"}

        response = requests.post(url, json=payload, headers=headers)

        if response.status_code == 200:
            score = response.json()["score"]
            originality_score_original[index] = score["original"]
            originality_score_ai[index] = score["ai"]

            if score["original"] < score["ai"]:
                originality_score_overall[index] = 1
            else:
                originality_score_overall[index] = 0
        else:
            print(response.text)
            break

    dataset["originality_score_ai"] = originality_score_ai
    dataset["originality_score_original"] = originality_score_original
    dataset["originality_score_overall"] = originality_score_overall

    return dataset


dataset = originality_ai_output(dataset)

In [None]:
def gptzero_output(dataset: pd.DataFrame):
    # label records with gptzero
    url = "https://api.gptzero.me/v2/predict/text"
    gptzero_api_key = os.getenv("GPTZERO_API_KEY")

    (
        gptzero_score_average,
        gptzero_score_ai,
        gptzero_burstiness,
        gptzero_score_overall,
    ) = [{}, {}, {}, {}]

    for index, row in dataset.iterrows():
        payload = {"document": row["essay"]}
        headers = {
            "Accept": "application/json",
            "Content-Type": "application/json",
            "x-api-key": gptzero_api_key,
        }

        response = requests.post(url, json=payload, headers=headers)

        if response.status_code == 200:
            average_generated_prob = response.json()["documents"][0][
                "average_generated_prob"
            ]
            completely_generated_prob = response.json()["documents"][0][
                "completely_generated_prob"
            ]
            overall_burstiness = response.json()["documents"][0]["overall_burstiness"]

            gptzero_score_average[index] = average_generated_prob
            gptzero_score_ai[index] = completely_generated_prob
            gptzero_burstiness[index] = overall_burstiness

            if completely_generated_prob > 0.5:
                gptzero_score_overall[index] = 1
            else:
                gptzero_score_overall[index] = 0
        else:
            print(response.text)
            break

    dataset["gptzero_score_ai"] = gptzero_score_ai
    dataset["gptzero_score_average"] = gptzero_score_average
    dataset["gptzero_score_overall"] = gptzero_score_overall
    dataset["gptzero_burstiness"] = gptzero_burstiness

    return dataset


dataset = gptzero_output(dataset)

In [None]:
del dataset["essay"]
del dataset["chars"]
del dataset["words"]
dataset.to_csv(Path('./results/results.csv'), index=False)