In [1]:
import os
import openai
import pandas as pd
import tiktoken
import numpy as np
from dotenv import load_dotenv
from openai import OpenAI
from langchain.schema import Document
from langchain_pinecone import PineconeVectorStore
from langchain_core.output_parsers import StrOutputParser
from langchain_openai import OpenAIEmbeddings
from langchain_pinecone import Pinecone

#### Necessary pip installation commands include:
- pip install os
- pip install openai
- pip install pandas
- pip install tiktoken
- pip install numpy
- pip install python-dotenv
- pip install langchain
- pip install langchain-pinecone
- pip install langchain-core
- pip install langchain-openai

#### <b>Note</b>: have a <b>.env</b> file already created for accessing API key

In [2]:
# retrieving API Key from OpenAI platform
load_dotenv()
FINE_TUNED_TOXIC_DETECTION_API_KEY = os.getenv('FINE_TUNED_TOXIC_DETECTION_API_KEY')
pinecone_api_key = os.getenv('PINECONE_API_KEY')
pc_index = os.getenv('PINECONE_GPT')

In [3]:
# initializing Pinecone vector database instance
docSearch = Pinecone(
    index_name=pc_index,
    embedding=OpenAIEmbeddings()
)

  docSearch = Pinecone(


In [4]:
# instantiating client with API key
fine_tuned = OpenAI(
    api_key=FINE_TUNED_TOXIC_DETECTION_API_KEY
)

In [7]:
# retrieving dataset utilized for evaluation
df = pd.read_csv('../../data/csv/toxic_classification_evaluation_set.csv')
df['Toxic'] = df['Toxic'].apply(lambda x: str(int(x)))
df

Unnamed: 0,Text,Toxic
0,largely written head problem put thoughts scre...,0
1,unable answer question7713019517,0
2,According edits add ratings based brothers poi...,0
3,take back Watch back u gwenrol whatever europe...,0
4,Take care believe bullshit see Bullshit 991842...,1
...,...,...
995,well know fuck idea notability fuck line make ...,1
996,mane fuck souja boi nigga dont go hard n paint,1
997,certainly better world service called simple N...,0
998,please interact leave warnings lie talk page c...,1


In [8]:
# retrieving dataset utilized for evaluation
tuned = pd.DataFrame(columns=['Text', 'Toxic'])

In [9]:
evaluation_prompt = ''
# Read the evaluation prompt from the text file with utf-8 encoding
with open("../../data/text/fine_tuned_prompts/gpt-4-two-shot-prompt.txt", "r", encoding="utf-8") as file:
    evaluation_prompt += file.read()

In [10]:
evaluation_prompt

'Classify these comments as either toxic (1) non-toxic (0). Toxic comments contain offensive, harmful, or abusive language. Label 1 for toxic and 0 for non-toxic\n\nExamples for Reference:\n\nText: “I disagree with your point, but I respect your view.”\n\nLabel: 0\n\nText: “Go back to where you came from.”\n\nLabel: 1\n\nClassify the following comment:'

In [11]:
# processing fine-tuned GPT-4o model across entire dataset with advanced prompt
for index, row in df.iterrows():
    text = row['Text']
    completion = fine_tuned.chat.completions.create(
        model="ft:gpt-4o-2024-08-06:personal::ASwKLqOH",
        messages=[
            {
                "role": "system", 
                "content": evaluation_prompt
            },
            {
                "role": "user",
                "content": text
            }
        ],
        max_tokens=10
    )
    tuned.loc[index] = [row['Text'], completion.choices[0].message.content]


In [12]:
tuned

Unnamed: 0,Text,Toxic
0,largely written head problem put thoughts scre...,0
1,unable answer question7713019517,0
2,According edits add ratings based brothers poi...,0
3,take back Watch back u gwenrol whatever europe...,1
4,Take care believe bullshit see Bullshit 991842...,1
...,...,...
995,well know fuck idea notability fuck line make ...,1
996,mane fuck souja boi nigga dont go hard n paint,1
997,certainly better world service called simple N...,0
998,please interact leave warnings lie talk page c...,0


In [13]:
# comparing results of model to dataset
compare = tuned['Toxic'] == df['Toxic']
gptfour_finetuned_accuracy = compare.values.sum() / compare.size

In [14]:
compare

0       True
1       True
2       True
3      False
4       True
       ...  
995     True
996     True
997     True
998    False
999     True
Name: Toxic, Length: 1000, dtype: bool

In [15]:
# fine-tuned GPT-4o model accuracy with advanced prompted engineering (role prompting, two-shot examples)
print(f"Accuracy: {gptfour_finetuned_accuracy * 100:.2f}%")

Accuracy: 94.70%
