In [1]:
import os
import openai
import pandas as pd
from dotenv import load_dotenv
from openai import OpenAI

# This notebook is organized for creating a proper job for 
# fine-tuning a GPT model of their choice. For our goal
# we focused on the GPT-4o model primarily

In [8]:
# retrieving API Key for fine-tuned GPT-4o model
load_dotenv()
FINE_TUNED_TOXIC_DETECTION_API_KEY = os.getenv("FINE_TUNED_TOXIC_DETECTION_API_KEY")

In [9]:
# instantiating client with API key
client = OpenAI(
    api_key=FINE_TUNED_TOXIC_DETECTION_API_KEY
)

In [17]:
# preparing files on OpenAI dashboard for training and validation
client.files.create(
  file=open("../data/training_kaggle_toxic_classification_dataset.jsonl", "rb"),
  purpose="fine-tune"
)

client.files.create(
  file=open("../data/validation_kaggle_toxic_classification_dataset.jsonl", "rb"),
  purpose="fine-tune"
)

FileObject(id='file-UFWHTku5sU3K6GzVemhsZt0H', bytes=3847655, created_at=1727986652, filename='validation_kaggle_toxic_classification_dataset.jsonl', object='file', purpose='fine-tune', status='processed', status_details=None)

In [11]:
# files status
client.files.list()

SyncPage[FileObject](data=[FileObject(id='file-FpwsaDITa0co3Z4sNvzknGGX', bytes=7686481, created_at=1727979219, filename='kaggle_toxic_classification_dataset.jsonl', object='file', purpose='fine-tune', status='processed', status_details=None), FileObject(id='file-gYnCm7EvYy3qnTJhpMJ5sNs7', bytes=78420036, created_at=1727915595, filename='kaggle_toxic_classification_dataset.jsonl', object='file', purpose='fine-tune', status='processed', status_details=None), FileObject(id='file-sw9JIkTReL2JanwwHKAhNuiG', bytes=48420688, created_at=1727913270, filename='training.jsonl', object='file', purpose='fine-tune', status='processed', status_details=None)], object='list', has_more=False, first_id='file-FpwsaDITa0co3Z4sNvzknGGX', last_id='file-sw9JIkTReL2JanwwHKAhNuiG')

In [18]:
# executing fine-tuning job with training and validation files
client.fine_tuning.jobs.create(
    training_file="file-zX0Hx54u69Y73wTuxyDfTuLN",
    validation_file="file-UFWHTku5sU3K6GzVemhsZt0H",
    model="gpt-4o-2024-08-06"
)

FineTuningJob(id='ftjob-JAIhvjA1yAen4Yt6Utyh3289', created_at=1727986789, error=Error(code=None, message=None, param=None), fine_tuned_model=None, finished_at=None, hyperparameters=Hyperparameters(n_epochs='auto', batch_size='auto', learning_rate_multiplier='auto'), model='gpt-4o-2024-08-06', object='fine_tuning.job', organization_id='org-S652ryxRoR0nJQpnMy7g1xcm', result_files=[], seed=1154032060, status='validating_files', trained_tokens=None, training_file='file-zX0Hx54u69Y73wTuxyDfTuLN', validation_file='file-UFWHTku5sU3K6GzVemhsZt0H', estimated_finish=None, integrations=[], user_provided_suffix=None)

In [15]:
# checking job progress/status
client.fine_tuning.jobs.retrieve("ftjob-Mymd6RM7VvSJwOVBBpqNwx5p")

FineTuningJob(id='ftjob-Mymd6RM7VvSJwOVBBpqNwx5p', created_at=1727979309, error=Error(code=None, message=None, param=None), fine_tuned_model='ft:gpt-4o-2024-08-06:personal::AELSRgXJ', finished_at=1727982438, hyperparameters=Hyperparameters(n_epochs=1, batch_size=10, learning_rate_multiplier=2), model='gpt-4o-2024-08-06', object='fine_tuning.job', organization_id='org-S652ryxRoR0nJQpnMy7g1xcm', result_files=['file-5gCTWySFk0MgmOZ2YjQvyEZJ'], seed=69129836, status='succeeded', trained_tokens=1277205, training_file='file-FpwsaDITa0co3Z4sNvzknGGX', validation_file=None, estimated_finish=None, integrations=[], user_provided_suffix=None)

In [12]:
evaluation_prompt = ''
# Read the evaluation prompt from the text file with utf-8 encoding
with open("../data/text/detailed_instructions.txt", "r", encoding="utf-8") as file:
    evaluation_prompt += file.read()

In [13]:
evaluation_prompt

'Task: You are given a set of comments from social media. Your goal is to evaluate each comment and classify it as either toxic or non-toxic. A comment is considered toxic if it contains harmful, abusive, offensive, or harassing language. If a comment is toxic, label it with a 1. Otherwise, if it is neutral or polite and does not contain harmful language, label it with a 0 for non-toxic.\n\nDefinition of Toxicity:\nToxic comments (1): These are comments that include language or content that is rude, disrespectful, hateful, harmful, threatening, or harassing. This includes but is not limited to:\n\nProfanity or slurs (racial, gender-based, homophobic, etc.)\nDirect or implied threats of violence or harm\nPersonal attacks, name-calling, or derogatory statements\nEncouragement of harm or violence toward individuals or groups\nLanguage that promotes discrimination, hatred, or exclusion\nInsults or offensive jokes directed at individuals or groups based on their identity, beliefs, or charac

In [16]:
# prompts fine-tuned model with custom text and evaluation prompt
completion = client.chat.completions.create(
    model="ft:gpt-4o-2024-08-06:personal::AENT6Q3Y",
    messages=[
        {
            "role": "system", 
            "content": evaluation_prompt
        },
        {
            "role": "user",
            "content": "Woah, that was uncalled for"
        }
    ]
)

In [17]:
# response from fine-tuned model
print(completion.choices[0].message.content)

0
