In [8]:
import os
import openai
import pandas as pd
import numpy as np
from dotenv import load_dotenv
from openai import OpenAI

In [9]:
# https://platform.openai.com/docs/guides/optimizing-llm-accuracy/llm-optimization-context
# prompt engineering
# few-shot, zero-shot learning
# RAG - using this technique to find similar responses to use for few-shot learning

In [10]:
df = pd.read_csv('../../data/csv/toxic_classification_evaluation_set.csv')
df['Toxic'] = df['Toxic'].apply(lambda x: str(np.int64(x)))
df

Unnamed: 0,Text,Toxic
0,Phrases like pepper entire article displaying ...,0
1,Ok seriously hell Antlion page vandalism 69138...,0
2,REDIRECT Talk1er Gaou song,0
3,following exchange answered charge admitted pa...,0
4,Go fuck fucking dick go rot hell piece crap mo...,1
...,...,...
995,Fuck Fuck racist basterd,1
996,stated attack Summerluvins talk page vaguely s...,1
997,way doesnt even place XBox Live Marketplace ca...,0
998,bad article behalf wikipadia onesided propagan...,1


In [11]:
load_dotenv()
FINE_TUNED_TOXIC_DETECTION_API_KEY = os.getenv("FINE_TUNED_TOXIC_DETECTION_API_KEY")
REGULAR_TOXIC_DETECTION_API_KEY = os.getenv("REGULAR_TOXIC_DETECTION_API_KEY")

In [12]:
general = OpenAI(
    api_key=REGULAR_TOXIC_DETECTION_API_KEY
)

In [13]:
original = pd.DataFrame(columns=['Text', 'Toxic'])

In [14]:
# gpt-3.5-turbo-0125
for index, row in df.iterrows():
    completion = general.chat.completions.create(
        model="gpt-3.5-turbo-0125",
        messages=[
            {
                "role": "system", 
                "content": "Detect whether either is 1 for toxic or 0 for non-toxic"
            },
            {
                "role": "user",
                "content": row['Text']
            }
        ]
    )
    original.loc[index] = [row['Text'], completion.choices[0].message.content]


In [15]:
# gpt-3.5 needs prompt to be really specific; more instruction needed
# gpt-4 will give you a response of a number as requested, gpt-3.5 will give
# a longer sequence of characters which wasn't asked for
original

Unnamed: 0,Text,Toxic
0,Phrases like pepper entire article displaying ...,The text provided contains both toxic and non-...
1,Ok seriously hell Antlion page vandalism 69138...,The text provided does not seem to be making s...
2,REDIRECT Talk1er Gaou song,"I'm sorry, I'm not able to browse the internet..."
3,following exchange answered charge admitted pa...,"Sorry, I am unable to determine whether this e..."
4,Go fuck fucking dick go rot hell piece crap mo...,Toxic
...,...,...
995,Fuck Fuck racist basterd,1
996,stated attack Summerluvins talk page vaguely s...,This text contains toxic language with insults...
997,way doesnt even place XBox Live Marketplace ca...,0
998,bad article behalf wikipadia onesided propagan...,Toxic


In [16]:
compare = original['Toxic'] == df['Toxic']
gptfour_original_accuracy = compare.values.sum() / compare.size

In [17]:
compare

0      False
1      False
2      False
3      False
4      False
       ...  
995     True
996    False
997     True
998    False
999    False
Name: Toxic, Length: 1000, dtype: bool

In [18]:
# 4%
# 70.90% detailed prompt
# 73.30% summarized prompt
# 83.40% summarized prompt with 2-shot learning
# 81.50% summarized prompt with 4-shot learning
print(f"Accuracy: {gptfour_original_accuracy * 100:.2f}%")

Accuracy: 21.80%
