In [11]:
import os
import openai
import pandas as pd
import numpy as np
from dotenv import load_dotenv
from openai import OpenAI

#### Necessary pip installation commands include:
- pip install os
- pip install openai
- pip install pandas
- pip install numpy
- pip install python-dotenv


#### <b>Note</b>: have a <b>.env</b> file already created for accessing API key

In [12]:
# retrieving API Key from OpenAI platform
load_dotenv()
REGULAR_TOXIC_DETECTION_API_KEY = os.getenv("REGULAR_TOXIC_DETECTION_API_KEY")

In [13]:
# instantiating client with API key
general = OpenAI(
    api_key=REGULAR_TOXIC_DETECTION_API_KEY
)

In [14]:
# retrieving dataset utilized for evaluation
df = pd.read_csv('../../data/csv/toxic_classification_evaluation_set.csv')
df['Toxic'] = df['Toxic'].apply(lambda x: str(np.int64(x)))
df

Unnamed: 0,Text,Toxic
0,going handle notable alumni like Duffield atte...,0
1,wwwcornsilkscom sockpuppets,0
2,1049 11 Sep 2004 UTC,0
3,Deep web article Thanks message made change th...,0
4,Youre quite coward kutta joking However one th...,1
...,...,...
995,Go kill ashamed Twoofers like scumbags deserve...,1
996,Fuck cluelessbot filthy roodypoo99241217133,1
997,Besides defecate image ex Grave Vince Talabér ...,0
998,Gay Guy know Wuzzup like sadomasochism Gim,1


In [15]:
# creating container for holding model completions for comparison
completions = pd.DataFrame(columns=['Text', 'Toxic'])

In [16]:
# processing original GPT-4o model across entire dataset
for index, row in df.iterrows():
    completion = general.chat.completions.create(
        model="gpt-4o",
        messages=[
            {
                "role": "system", 
                "content": "Your job is to detect toxicity from a non-biased viewpoint and label prompted content as either 1 for toxic or 0 for non-toxic"
            },
            {
                "role": "user",
                "content": row['Text']
            }
        ]
    )
    completions.loc[index] = [row['Text'], completion.choices[0].message.content]


In [17]:
completions

Unnamed: 0,Text,Toxic
0,going handle notable alumni like Duffield atte...,0
1,wwwcornsilkscom sockpuppets,0
2,1049 11 Sep 2004 UTC,0
3,Deep web article Thanks message made change th...,0
4,Youre quite coward kutta joking However one th...,1
...,...,...
995,Go kill ashamed Twoofers like scumbags deserve...,1
996,Fuck cluelessbot filthy roodypoo99241217133,1
997,Besides defecate image ex Grave Vince Talabér ...,0
998,Gay Guy know Wuzzup like sadomasochism Gim,0


In [18]:
# comparing results of model to dataset
compare = completions['Toxic'] == df['Toxic']
gptfour_original_accuracy = compare.values.sum() / compare.size

In [19]:
compare

0       True
1       True
2       True
3       True
4       True
       ...  
995     True
996     True
997     True
998    False
999     True
Name: Toxic, Length: 1000, dtype: bool

In [20]:
# GPT-4o model accuracy with role prompting technique
print(f"Accuracy: {gptfour_original_accuracy * 100:.2f}%")

Accuracy: 79.90%
