# Fine-Tuning GPT-4 with OpenAI API

In [1]:
#Install necessary libraries
#%pip install openai pandas jsonlines --quiet
#pip uninstall openai
#%pip install openai==0.28
#pip install git+https://github.com/openai/openai-python


^C
Note: you may need to restart the kernel to use updated packages.


## Import Libraries and Set Up API Key

In [2]:
import json
import jsonlines
import openai
import os
from openai import OpenAI

client = OpenAI(api_key='Your OpenAI API Key')



## Load and Validate Fine-Tuning Data

In [3]:
#Load and validate the jsonl file for conversational fine-tuning
file_path = 'laptop_chat_finetuning_new.jsonl'

#FUnction to validate the structure of the jsonl file used for fine-tuning, checking if each entry has a messages key, and inside the messages key it has role and content keys. And inside the role, it should be system, user or assitant.
def validate_messages_jsonl(file_path):
    with jsonlines.open(file_path) as reader:
        for i, obj in enumerate(reader):
            if 'messages' not in obj:
                raise ValueError(f"Error in line {i + 1}: Missing 'messages' key.")
            for j, message in enumerate(obj['messages']):
                if 'role' not in message or 'content' not in message:
                    raise ValueError(f"Error in line {i + 1}, message {j + 1}: Missing 'role' or 'content' key.")
                if message['role'] not in ['system', 'user', 'assistant']:
                    raise ValueError(f"Error in line {i + 1}, message {j + 1}: Invalid role '{message['role']}'.")
    print("Data validated successfully for conversational fine-tuning.")

validate_messages_jsonl(file_path)

Data validated successfully for conversational fine-tuning.


## Upload File to OpenAI for Fine-Tuning

In [5]:
#Load the jsonl file into a list of json objects and conevrt reader object into a list
with jsonlines.open(file_path) as reader:
    data = list(reader)

#Split data into training (80%) and validation (20%) for fine-tuning
train_size = int(0.8 * len(data))
train_data = data[:train_size]
validation_data = data[train_size:]

#Save the split data into separate files
train_file_path = 'laptop_chat_train.jsonl'
validation_file_path = 'laptop_chat_validation.jsonl'
with jsonlines.open(train_file_path, mode='w') as writer:
    writer.write_all(train_data)
with jsonlines.open(validation_file_path, mode='w') as writer:
    writer.write_all(validation_data)

#Upload training and validation file to OpenAI for fine-tuning job
train_response = client.files.create(
    file=open(train_file_path, "rb"),
    purpose="fine-tune"
)
validation_response = client.files.create(
    file=open(validation_file_path, "rb"),
    purpose="fine-tune"
)

#Access the ID attribute directly from the response object to track the uploaded datasets
train_file_id = train_response.id
validation_file_id = validation_response.id

print(f"Training File ID: {train_file_id}")
print(f"Validation File ID: {validation_file_id}")

Training File ID: file-ANcpYYdVMRzMfXcquEj3hJ
Validation File ID: file-96TmZqMy5caaH8n41jJ35s


In [15]:
#Create the file on OpenAI for fine-tuning job
response = client.files.create(
    file=open("laptop_chat_finetuning_reduced.jsonl", "rb"),
    purpose="fine-tune"
)

#Convert the FileObject to a dictionary
response_dict = {
    "id": response.id, #Unique ID assigned by OpenAI
    "bytes": response.bytes, #File size
    "created_at": response.created_at, #Time of the uploaded file
    "filename": response.filename, #Name of the file
    "object": response.object, #Type of object
    "purpose": response.purpose, #Purpose of file
    "status": response.status, #Status of the file to track the fine-tuning
    "status_details": response.status_details, #Additional status details
}

#Save the response to a JSON file
with open("response.json", "w") as json_file:
    json.dump(response_dict, json_file, indent=4)

#Extract the file ID
file_id = response_dict['id']
print(f"File ID: {file_id}")

## Start Fine-Tuning Job

In [16]:
#Start fine-tuning job with validation file included
fine_tune_response = client.fine_tuning.jobs.create(
    training_file=train_file_id,
    validation_file=validation_file_id,
    model="gpt-4o-2024-08-06"
)

#Convert fine-tuning job response to a dictionary and save it
fine_tune_dict = {
    "id": fine_tune_response.id,
    "created_at": fine_tune_response.created_at,
    "status": fine_tune_response.status,
    "model": fine_tune_response.model,
    "training_file": fine_tune_response.training_file,
    "validation_file": fine_tune_response.validation_file,
    "fine_tuned_model": fine_tune_response.fine_tuned_model,
    "hyperparameters": {
        "n_epochs": fine_tune_response.hyperparameters.n_epochs,
        "batch_size": fine_tune_response.hyperparameters.batch_size,
        "learning_rate_multiplier": fine_tune_response.hyperparameters.learning_rate_multiplier
    },
    "error": {
        "code": fine_tune_response.error.code if fine_tune_response.error else None,
        "message": fine_tune_response.error.message if fine_tune_response.error else None,
        "param": fine_tune_response.error.param if fine_tune_response.error else None
    },
    "result_files": fine_tune_response.result_files,
    "status_details": fine_tune_response.status,
    "estimated_finish": fine_tune_response.estimated_finish,
}

with open("fine_tune_response.json", "w") as json_file:
    json.dump(fine_tune_dict, json_file, indent=4)

print(f"Fine-tuning job started with ID: {fine_tune_response.id}")

Fine-tuning job started with ID: ftjob-1EnAOTMHgaW05WUchqTh76Sf


In [None]:
# #Create the fine-tuning job
# fine_tune_response = client.fine_tuning.jobs.create(
#     training_file=file_id,
#     model="gpt-4o-2024-08-06"  #Use GPT-4o version that's suitable for fine-tuning
# )

# #Convert the FineTuningJob object into a dictionary
# fine_tune_dict = {
#     "id": fine_tune_response.id,
#     "created_at": fine_tune_response.created_at,
#     "status": fine_tune_response.status,
#     "model": fine_tune_response.model,
#     "training_file": fine_tune_response.training_file,
#     "validation_file": fine_tune_response.validation_file,
#     "fine_tuned_model": fine_tune_response.fine_tuned_model,
#     "hyperparameters": {
#         "n_epochs": fine_tune_response.hyperparameters.n_epochs,
#         "batch_size": fine_tune_response.hyperparameters.batch_size,
#         "learning_rate_multiplier": fine_tune_response.hyperparameters.learning_rate_multiplier
#     },
#     "error": {
#         "code": fine_tune_response.error.code,
#         "message": fine_tune_response.error.message,
#         "param": fine_tune_response.error.param
#     },
#     "result_files": fine_tune_response.result_files,
#     "status_details": fine_tune_response.status,
#     "estimated_finish": fine_tune_response.estimated_finish,
# }

# #Save the fine-tuning job response to a JSON file
# with open("fine_tune_response.json", "w") as json_file:
#     json.dump(fine_tune_dict, json_file, indent=4)

# #Extract the fine-tuning job ID
# fine_tune_id = fine_tune_response.id
# print(f"Fine-tuning job started with ID: {fine_tune_id}")

Fine-tuning job started with ID: ftjob-4wNiRYkDaSJzF42sOnHUoUZZ


The progress and results should be availaible in OpenAI fine-tuning website while logging in to the same account of the API used.