In [7]:
import os
import openai
import pandas as pd
import numpy as np
from dotenv import load_dotenv
from openai import OpenAI

#### Necessary pip installation commands include:
- pip install os
- pip install openai
- pip install pandas
- pip install numpy
- pip install python-dotenv


#### <b>Note</b>: have a <b>.env</b> file already created for accessing API key

In [8]:
# retrieving API Key from OpenAI platform
load_dotenv()
REGULAR_TOXIC_DETECTION_API_KEY = os.getenv("REGULAR_TOXIC_DETECTION_API_KEY")

In [9]:
# instantiating client with API key
general = OpenAI(
    api_key=REGULAR_TOXIC_DETECTION_API_KEY
)

In [10]:
# retrieving dataset utilized for evaluation
df = pd.read_csv('../../data/csv/toxic_classification_evaluation_set.csv')
df['Toxic'] = df['Toxic'].apply(lambda x: str(np.int64(x)))
df

Unnamed: 0,Text,Toxic
0,Phrases like pepper entire article displaying ...,0
1,Ok seriously hell Antlion page vandalism 69138...,0
2,REDIRECT Talk1er Gaou song,0
3,following exchange answered charge admitted pa...,0
4,Go fuck fucking dick go rot hell piece crap mo...,1
...,...,...
995,Fuck Fuck racist basterd,1
996,stated attack Summerluvins talk page vaguely s...,1
997,way doesnt even place XBox Live Marketplace ca...,0
998,bad article behalf wikipadia onesided propagan...,1


In [11]:
# creating container for holding model completions for comparison
completions = pd.DataFrame(columns=['Text', 'Toxic'])

In [12]:
# processing original GPT-4o model across entire dataset
for index, row in df.iterrows():
    completion = general.chat.completions.create(
        model="gpt-4o",
        messages=[
            {
                "role": "system", 
                "content": "Detect whether either is 1 for toxic or 0 for non-toxic"
            },
            {
                "role": "user",
                "content": row['Text']
            }
        ]
    )
    completions.loc[index] = [row['Text'], completion.choices[0].message.content]


In [13]:
completions

Unnamed: 0,Text,Toxic
0,Phrases like pepper entire article displaying ...,0
1,Ok seriously hell Antlion page vandalism 69138...,1
2,REDIRECT Talk1er Gaou song,0
3,following exchange answered charge admitted pa...,0
4,Go fuck fucking dick go rot hell piece crap mo...,1
...,...,...
995,Fuck Fuck racist basterd,1
996,stated attack Summerluvins talk page vaguely s...,1
997,way doesnt even place XBox Live Marketplace ca...,0
998,bad article behalf wikipadia onesided propagan...,1


In [14]:
# comparing results of model to dataset
compare = completions['Toxic'] == df['Toxic']
gptfour_original_accuracy = compare.values.sum() / compare.size

In [15]:
compare

0       True
1      False
2       True
3       True
4       True
       ...  
995     True
996     True
997     True
998     True
999     True
Name: Toxic, Length: 1000, dtype: bool

In [16]:
# GPT-4o model accuracy
print(f"Accuracy: {gptfour_original_accuracy * 100:.2f}%")

Accuracy: 82.00%
