In [19]:
import os
import openai
import pandas as pd
import numpy as np
from dotenv import load_dotenv
from openai import OpenAI

In [20]:
# Fine tuning increases the accuracy from 84% to 84.20%; minor change

In [21]:
df = pd.read_csv('../data/processed_toxic_classification_dataset_short.csv')
df['Toxic'] = df['Toxic'].apply(lambda x: str(np.int64(0)))
df

Unnamed: 0,Text,Toxic
0,Geez forgetful Weve already discussed Marx ana...,0
1,Carioca RFA Thanks support request adminship f...,0
2,Birthday worries Enjoy ur daytalke,0
3,Pseudoscience category Im assuming article pse...,0
4,phrase exists would provided search engine eve...,0
...,...,...
995,2006 UTC rest us ever gone past 3RR good one V...,0
996,Yay lets Pedantic Semantics dance rolls eyes t...,0
997,supposed Know,0
998,guys really discuss napoleon need get fuckin l...,0


In [22]:
load_dotenv()
FINE_TUNED_TOXIC_DETECTION_API_KEY = os.getenv("FINE_TUNED_TOXIC_DETECTION_API_KEY")

In [23]:
fine_tuned = OpenAI(
    api_key=FINE_TUNED_TOXIC_DETECTION_API_KEY
)

In [24]:
tuned = pd.DataFrame(columns=['Text', 'Toxic'])

In [39]:
evaluation_prompt = ''
# Read the evaluation prompt from the text file with utf-8 encoding
with open("../text/summarized_instructions.txt", "r", encoding="utf-8") as file:
    evaluation_prompt += file.read()

In [40]:
evaluation_prompt

'You are tasked with classifying comments as either toxic or non-toxic from a non-biased viewpoint. \nA comment is toxic if it contains harmful, abusive, or offensive language, such as threats, hate speech, \npersonal attacks, or discrimination. Label toxic comments with a 1 and non-toxic comments with a 0. \n\nMild criticism or disagreement without harmful language should be labeled as non-toxic (0). \nUse these guidelines:\n- Label toxic (1) if: profanity, slurs, threats, or discrimination is present.\n- Label non-toxic (0) if: the comment is neutral, polite, or constructive, even if it expresses disagreement.\n'

In [57]:
for index, row in df.iterrows():
    completion = fine_tuned.chat.completions.create(
        model="ft:gpt-4o-2024-08-06:personal::AENT6Q3Y",
        messages=[
            {
                "role": "system", 
                "content": "Your job is to detect toxicity from a non-biased viewpoint and label prompted content. ONLY GIVE THE VALUE 1 FOR TOXIC OR 0 FOR NON-TOXIC"
            },
            {
                "role": "user",
                "content": row['Text']
            }
        ]
    )
    tuned.loc[index] = [row['Text'], completion.choices[0].message.content]


In [58]:
tuned

Unnamed: 0,Text,Toxic
0,Geez forgetful Weve already discussed Marx ana...,0
1,Carioca RFA Thanks support request adminship f...,0
2,Birthday worries Enjoy ur daytalke,0
3,Pseudoscience category Im assuming article pse...,0
4,phrase exists would provided search engine eve...,0
...,...,...
995,2006 UTC rest us ever gone past 3RR good one V...,0
996,Yay lets Pedantic Semantics dance rolls eyes t...,0
997,supposed Know,0
998,guys really discuss napoleon need get fuckin l...,1


In [59]:
compare = tuned['Toxic'] == df['Toxic']
gptfour_original_accuracy = compare.values.sum() / compare.size

In [60]:
compare

0       True
1       True
2       True
3       True
4       True
       ...  
995     True
996     True
997     True
998    False
999     True
Name: Toxic, Length: 1000, dtype: bool

In [61]:
df['Text'][998]

'guys really discuss napoleon need get fuckin life sad would rather discuss someone dead years look pornYOU GUYS R HOMOSEXUALS LIKE ANAL SEX TYPES'

In [62]:
# 92.10% with undetailed evaluation prompt
# 90.80% with too detailed evaluation prompt
print(f"Accuracy: {gptfour_original_accuracy * 100:.2f}%")

Accuracy: 91.50%
