In [4]:
import os
import openai
import pandas as pd
import numpy as np
from dotenv import load_dotenv
from openai import OpenAI

In [5]:
# https://platform.openai.com/docs/guides/optimizing-llm-accuracy/llm-optimization-context
# prompt engineering
# few-shot, zero-shot learning
# RAG - using this technique to find similar responses to use for few-shot learning

In [6]:
df = pd.read_csv('../data//csv/processed_toxic_classification_dataset_short.csv')
df['Toxic'] = df['Toxic'].apply(lambda x: str(np.int64(x)))
df

Unnamed: 0,Text,Toxic
0,Geez forgetful Weve already discussed Marx ana...,0
1,Carioca RFA Thanks support request adminship f...,0
2,Birthday worries Enjoy ur daytalke,0
3,Pseudoscience category Im assuming article pse...,0
4,phrase exists would provided search engine eve...,0
...,...,...
995,2006 UTC rest us ever gone past 3RR good one V...,0
996,Yay lets Pedantic Semantics dance rolls eyes t...,0
997,supposed Know,0
998,guys really discuss napoleon need get fuckin l...,1


In [7]:
load_dotenv()
FINE_TUNED_TOXIC_DETECTION_API_KEY = os.getenv("FINE_TUNED_TOXIC_DETECTION_API_KEY")
REGULAR_TOXIC_DETECTION_API_KEY = os.getenv("REGULAR_TOXIC_DETECTION_API_KEY")

In [8]:
general = OpenAI(
    api_key=REGULAR_TOXIC_DETECTION_API_KEY
)

In [9]:
original = pd.DataFrame(columns=['Text', 'Toxic'])

In [22]:
evaluation_prompt = ''
# Read the evaluation prompt from the text file with utf-8 encoding
with open("../data/text/detailed_instructions.txt", "r", encoding="utf-8") as file:
    evaluation_prompt += file.read()

In [23]:
evaluation_prompt

'Task: You are given a set of comments from social media. Your goal is to evaluate each comment and classify it as either toxic or non-toxic. A comment is considered toxic if it contains harmful, abusive, offensive, or harassing language. If a comment is toxic, label it with a 1. Otherwise, if it is neutral or polite and does not contain harmful language, label it with a 0 for non-toxic.\n\nDefinition of Toxicity:\nToxic comments (1): These are comments that include language or content that is rude, disrespectful, hateful, harmful, threatening, or harassing. This includes but is not limited to:\n\nProfanity or slurs (racial, gender-based, homophobic, etc.)\nDirect or implied threats of violence or harm\nPersonal attacks, name-calling, or derogatory statements\nEncouragement of harm or violence toward individuals or groups\nLanguage that promotes discrimination, hatred, or exclusion\nInsults or offensive jokes directed at individuals or groups based on their identity, beliefs, or charac

In [25]:
# gpt-3.5-turbo-0125
for index, row in df.iterrows():
    completion = general.chat.completions.create(
        model="gpt-3.5-turbo-0125",
        messages=[
            {
                "role": "system", 
                "content": evaluation_prompt
            },
            {
                "role": "user",
                "content": row['Text']
            }
        ]
    )
    original.loc[index] = [row['Text'], completion.choices[0].message.content]


In [26]:
# gpt-3.5 needs prompt to be really specific; more instruction needed
# gpt-4 will give you a response of a number as requested, gpt-3.5 will give
# a longer sequence of characters which wasn't asked for
original

Unnamed: 0,Text,Toxic
0,Geez forgetful Weve already discussed Marx ana...,0
1,Carioca RFA Thanks support request adminship f...,0
2,Birthday worries Enjoy ur daytalke,0
3,Pseudoscience category Im assuming article pse...,0
4,phrase exists would provided search engine eve...,0
...,...,...
995,2006 UTC rest us ever gone past 3RR good one V...,0
996,Yay lets Pedantic Semantics dance rolls eyes t...,0
997,supposed Know,0\n\nReason: The comment is unclear and doesn'...
998,guys really discuss napoleon need get fuckin l...,1


In [27]:
compare = original['Toxic'] == df['Toxic']
gptfour_original_accuracy = compare.values.sum() / compare.size

In [28]:
compare

0       True
1       True
2       True
3       True
4       True
       ...  
995     True
996     True
997    False
998     True
999    False
Name: Toxic, Length: 1000, dtype: bool

In [29]:
# 4%
# 70.90%
# ???
print(f"Accuracy: {gptfour_original_accuracy * 100:.2f}%")

Accuracy: 70.90%
