In [1]:
import os
import openai
import pandas as pd
import numpy as np
from dotenv import load_dotenv
from openai import OpenAI

In [2]:
# Fine tuning increases the accuracy from 84% to 84.20%; minor change

In [3]:
df = pd.read_csv('../data/csv/processed_toxic_classification_dataset_short.csv')
df['Toxic'] = df['Toxic'].apply(lambda x: str(int(x)))
df

Unnamed: 0,Text,Toxic
0,Geez forgetful Weve already discussed Marx ana...,0
1,Carioca RFA Thanks support request adminship f...,0
2,Birthday worries Enjoy ur daytalke,0
3,Pseudoscience category Im assuming article pse...,0
4,phrase exists would provided search engine eve...,0
...,...,...
995,2006 UTC rest us ever gone past 3RR good one V...,0
996,Yay lets Pedantic Semantics dance rolls eyes t...,0
997,supposed Know,0
998,guys really discuss napoleon need get fuckin l...,1


In [29]:
load_dotenv()
FINE_TUNED_TOXIC_DETECTION_API_KEY = os.getenv("FINE_TUNED_TOXIC_DETECTION_API_KEY")

In [30]:
fine_tuned = OpenAI(
    api_key=FINE_TUNED_TOXIC_DETECTION_API_KEY
)

In [31]:
tuned = pd.DataFrame(columns=['Text', 'Toxic'])

In [32]:
evaluation_prompt = ''
# Read the evaluation prompt from the text file with utf-8 encoding
with open("../data/text/summarized_instructions.txt", "r", encoding="utf-8") as file:
    evaluation_prompt += file.read()

In [33]:
evaluation_prompt

'You are tasked with classifying comments as either toxic or non-toxic from a non-biased viewpoint. \nA comment is toxic if it contains harmful, abusive, or offensive language, such as threats, hate speech, \npersonal attacks, obscene language, or discrimination. Label toxic comments with a 1 and non-toxic comments with a 0. \n\nMild criticism or disagreement without harmful language should be labeled as non-toxic (0). \nUse these guidelines:\n- Label toxic (1) if: profanity, slurs, threats, or discrimination is present.\n- Label non-toxic (0) if: the comment is neutral, polite, or constructive, even if it expresses disagreement.\n\nExamples for Reference:\n\nText: “I disagree with you, but I think you make some valid points.”\n1\n\nText: “Go back to where you came from, we don’t want you here.”\n0'

In [41]:
for index, row in df.iterrows():
    completion = fine_tuned.chat.completions.create(
        model="ft:gpt-4o-2024-08-06:personal::AENT6Q3Y",
        messages=[
            {
                "role": "system", 
                "content": evaluation_prompt
            },
            {
                "role": "user",
                "content": row['Text']
            }
        ]
    )
    tuned.loc[index] = [row['Text'], completion.choices[0].message.content]


In [48]:
tuned

Unnamed: 0,Text,Toxic
0,Geez forgetful Weve already discussed Marx ana...,0
1,Carioca RFA Thanks support request adminship f...,0
2,Birthday worries Enjoy ur daytalke,0
3,Pseudoscience category Im assuming article pse...,0
4,phrase exists would provided search engine eve...,0
...,...,...
995,2006 UTC rest us ever gone past 3RR good one V...,0
996,Yay lets Pedantic Semantics dance rolls eyes t...,0
997,supposed Know,0
998,guys really discuss napoleon need get fuckin l...,1


In [49]:
compare = tuned['Toxic'] == df['Toxic']
gptthreefive_original_accuracy = compare.values.sum() / compare.size

In [50]:
compare

0      True
1      True
2      True
3      True
4      True
       ... 
995    True
996    True
997    True
998    True
999    True
Name: Toxic, Length: 1000, dtype: bool

In [51]:
# GPT-4o: 97.30% with too mid-detailed evaluation prompt
# GPT-3.5 turbo 50 set: 91.80% with summarized two-shot prompt costing 73 cents
# GPT-3.5 turbo 200 set: 92.00% with summarized two-shot prompt costing 72 cents
# GPT 3.5 turbo 2000 set: 97:30% with summarized two-shot prompt costing 72 cents
print(f"Accuracy: {gptthreefive_original_accuracy * 100:.2f}%")

Accuracy: 97.20%


In [46]:

# completion = fine_tuned.chat.completions.create(
#     model="ft:gpt-4o-2024-08-06:personal::AENT6Q3Y",
#     messages=[
#         {
#             "role": "system", 
#             "content": "The user has a toxicity threshold of 1, meaning that they find 90 to 100 percent of comments non-toxic and have extremely high tolerance. Label this text as either 0 for non-toxic or 1 for toxic."
#         },
#         {
#             "role": "user",
#             "content": df['Text'][998]
#         }
#     ]
# )

In [47]:
completion.choices[0].message.content

'0'