In [1]:
import os
import re
import openai
import pandas as pd
import tiktoken
import numpy as np
from dotenv import load_dotenv
from openai import OpenAI
from langchain.schema import Document
from langchain_pinecone import PineconeVectorStore
from langchain_core.output_parsers import StrOutputParser
from langchain_openai import OpenAIEmbeddings
from langchain_pinecone import Pinecone

#### Necessary pip installation commands include:
- pip install openai
- pip install pandas
- pip install tiktoken
- pip install numpy
- pip install python-dotenv
- pip install langchain
- pip install langchain-pinecone
- pip install langchain-core
- pip install langchain-openai

#### <b>Note</b>: have a <b>.env</b> file already created for accessing API key

In [2]:
# retrieving API Key from OpenAI platform
load_dotenv()
FINE_TUNED_TOXIC_DETECTION_API_KEY = os.getenv('FINE_TUNED_TOXIC_DETECTION_API_KEY')
pinecone_api_key = os.getenv('PINECONE_API_KEY')
pc_index = os.getenv('PINECONE_GPT')

In [3]:
# initializing Pinecone vector database instance
docSearch = Pinecone(
    index_name=pc_index,
    embedding=OpenAIEmbeddings()
)
# instantiating client with API key
fine_tuned = OpenAI(
    api_key=FINE_TUNED_TOXIC_DETECTION_API_KEY
)

  docSearch = Pinecone(


In [4]:
def remove_repetition(text, depth=0):
    # cap = 5
    # check = 0
    # split_line = line.split(' ')
    # new_split = []
    # curr = split_line[0]
    
    # for word in split_line:
    #     if word == curr:
    #         check = check + 1
    #     else:
    #         curr = word
    #         check = 0
            
    #     if check < cap:
    #         new_split.append(word)

    pattern = r'\b(\w+(?:\s+\w+)*)\b(?:\s+\1\b)+'
    new_text = re.sub(pattern, r'\1', text, flags=re.IGNORECASE)
    
    if new_text == text or depth > 10: 
        return new_text
    return remove_repetition(new_text, depth + 1)
    
    # return " ".join(new_split)

In [5]:
# appends one/few shot examples to evaluation prompt
def shot_additions(examples):
   evaluation_prompt = ''
   # Read the evaluation prompt from the text file with utf-8 encoding
   with open("../../data/text/rag_prompts/gpt-4-two-shot-prompt.txt", "r", encoding="utf-8") as file:
        evaluation_prompt += file.read() 
   count = 1
   for example in examples:
       # separates example key and values
       split_ex = example.split(' - ')
       comment = split_ex[0]
       label = split_ex[1]
       print(f'#{count} shot: {comment}')
       evaluation_prompt += '\n\nText: \"' + remove_repetition(comment) + "\"" + '\n\n' + label
       count += 1
   print(f'Shots added!')
   evaluation_prompt += '\n\nClassify the following comment:'
   return evaluation_prompt

In [6]:
# queries vector database for custom,
# with similar examples to user prompt
def rag_prompt(text):
    two_shots = []
    # queries Pinecone database
    search_results = docSearch.max_marginal_relevance_search(text, k=25, fetch_k=50)
    print(f'Search Results Length: {len(search_results)}')
    for i in range(len(search_results)):
        content = search_results[i].page_content
        # prevents repetition which will cause errors within OpenAI
        if i != 250:
            two_shots.append(content)
        # two valid examples found
        if len(two_shots) == 2:
            print(f'Two Shots Found!')
            break
    return shot_additions(two_shots)

In [7]:
# tests accuracy of chosen model against unique prompt and data
def accuracy_testing(data, evaluation_prompt, model, use_rag = False):
    # chooses columns of focus
    tuned = pd.DataFrame(columns=['Text', 'Toxic'])
    for index, row in data.iterrows():
        text = row['Text']
        print(f"Index: {index}")
        print(f'Text: {text}')
        completion = fine_tuned.chat.completions.create(
            model=model,
            messages=[
                {
                    "role": "system", 
                    "content": evaluation_prompt if use_rag == False else rag_prompt(remove_repetition(text))
                },
                {
                    "role": "user",
                    "content": text
                }
            ]
        )
        tuned.loc[index] = [text, completion.choices[0].message.content]
    # comparing results of model to dataset
    compare = tuned['Toxic'] == data['Toxic']
    accuracy = compare.values.sum() / compare.size
    return accuracy

In [8]:
sugarai = pd.read_csv('../../data/csv/sugar_ai_toxicity_evaluation_set.csv')
unintended_bias = pd.read_csv('../../data/csv/unintended_bias_toxicity_classification_set.csv')
sugarai['Toxic'] = sugarai['Toxic'].apply(lambda x: str(int(x)))
unintended_bias['Toxic'] = unintended_bias['Toxic'].apply(lambda x: str(int(x)))

In [10]:
evaluation_prompt = ''
# Read the evaluation prompt from the text file with utf-8 encoding
with open("../../data/text/fine_tuned_prompts/gpt-3-5-1106-two-shot-prompt.txt", "r", encoding="utf-8") as file:
    evaluation_prompt += file.read()

In [10]:
# testing accuracy of Sugar AI Toxicity Classification dataset on original model
sugarai_original_test_accuracy = accuracy_testing(sugarai, "Detect whether either is 1 for toxic or 0 for non-toxic", "gpt-3.5-turbo-1106")
print(f"Accuracy: {sugarai_original_test_accuracy * 100:.2f}%")

Accuracy: 0.80%


In [13]:
# testing accuracy of Sugar AI Toxicity Classification dataset on fine-tuned model
sugarai_fine_tuned_test_accuracy = accuracy_testing(sugarai, evaluation_prompt, "ft:gpt-3.5-turbo-1106:personal::ASv1ofXm")
print(f"Accuracy: {sugarai_fine_tuned_test_accuracy * 100:.2f}%")

Accuracy: 91.88%


In [11]:
# testing accuracy of Unintended Sugar AI Toxicity Classification dataset on fine-tuned model with RAG
sugarai_rag_tuned_test_accuracy = accuracy_testing(sugarai, evaluation_prompt, "ft:gpt-3.5-turbo-1106:personal::ASv1ofXm", True)
print(f"Accuracy: {sugarai_rag_tuned_test_accuracy * 100:.2f}%")

Index: 0
Text: Youre full shitand food
Search Results Length: 25
Two Shots Found!
#1 shot: 1. Youre full shitand food
#2 shot: 2568. Hey ya doin fuck face u think safe huh ya SKANK ho need death nothing else really dislike existance think piece shit come home kill manlover hiding perish skank worthless dog shit backyard think come get GOD witness conduct crusade kind FUCK would change lci food fight article really hope die hole somewhere continue come back fucking ROT HELL longer needed earth wikipedia make like homosexual anal fuck drown cesspool bodily fluids really hope perish soon end life way want starting ideas Hang shoot head shotgun whatever wish hell even light fire hang dont make much mess one want spend much time WORTHLESS PIECE SHIT like go die commit suicide terminate existance would make happy hope GOD end life tomorrow even sunrise get blocked KNOW read thank longer needed thank managementgod
Shots added!
Index: 1
Text: Would accurate call shrimp gritz Looks fantastic
Se

In [14]:
# testing accuracy of Unintended Bias Toxicity Classification dataset on original model
unintended_bias_original_test_accuracy = accuracy_testing(unintended_bias, "Detect whether either is 1 for toxic or 0 for non-toxic", "gpt-3.5-turbo-1106")
print(f"Accuracy: {unintended_bias_original_test_accuracy * 100:.2f}%")

Accuracy: 0.60%


In [15]:
# testing accuracy of Unintended Bias Toxicity Classification dataset on fine-tuned model
unintended_bias_fine_tuned_original_test_accuracy = accuracy_testing(unintended_bias, evaluation_prompt, "ft:gpt-3.5-turbo-1106:personal::ASv1ofXm")
print(f"Accuracy: {unintended_bias_fine_tuned_original_test_accuracy * 100:.2f}%")

Accuracy: 78.40%


In [12]:
# testing accuracy of Unintended Bias Toxicity Classification dataset on fine-tuned model with RAG
unintended_bias_rag_tuned_test_accuracy = accuracy_testing(unintended_bias, evaluation_prompt, "ft:gpt-3.5-turbo-1106:personal::ASv1ofXm", True)
print(f"Accuracy: {unintended_bias_rag_tuned_test_accuracy * 100:.2f}%")

Index: 0
Text: idiot holding sign saying hate Blacks going win many friends influence people players take knee National Anthem winning many friends influencing many people time place peaceful protest Disrespecting flag see overwhelming blow back last weeks neither time place Sorry Charlie majority President owners fans way NASCAR crowd agree one DISRESPECT FLAG TAKING KNEE
Search Results Length: 25
Two Shots Found!
#1 shot: 1. idiot holding sign saying hate Blacks going win many friends influence people players take knee National Anthem winning many friends influencing many people time place peaceful protest Disrespecting flag see overwhelming blow back last weeks neither time place Sorry Charlie majority President owners fans way NASCAR crowd agree one DISRESPECT FLAG TAKING KNEE
#2 shot: 24. Youre right mistake called policy shortsighted clearly called ignorant Moving since goals convince readers youre wrong b convince youre wrong women minorities associate agree policies like one es