In [1]:
import json

# Define System Prompt

In [2]:
system_msg = """Below is the AISX apple fruit grading system, which grades the quality of apples based on their diameter, texture, and color. Each apple gets a certain number of points based on the following table:

Points, Diameter (d, inches)
1, 3 < d <= 3.6
2, 3.6 < d <= 4.2
3, d > 4.2

Points, Texture
1, Rough
2, Smooth but dimpled
3, Smooth

Points, Color
1, Yellow
2, Red with some yellow
3, Red

Apples with 8 or more points are Grade A, 6-7 points Grade B, and Grade C below 6 points.

You are an expert in this apple grading system."""

# Generate Synthetic Dataset

In [8]:
import random

apple_color_options = {"red": 3, "all red": 3, "99% red": 3, "crimson": 3,
                       "bright red": 3, "dull red": 3, "deep red": 3,
                       "yellow": 2, "bright yellow": 2, "dull yellow": 2,
                       "matte yellow": 2, "99% yellow": 2, "all yellow": 2,
                       "banana colored": 2, "red and yellow": 1, "red and a little bit yellow": 1, "yellow and red": 1, "yellowish and some red": 1,
                       "mostly red but with a hint of yellow": 1,
                       "red with small yellow spots": 1, "yellow with red spots": 1
                       }

apple_tecture_options = {"rough": 1, "grainy": 1, "very scratched": 1, "very pitted": 1,
                       "smooth with a few dimples": 2, "mostly smooth": 2,
                         "smooth but has a few scratches": 2,
                       "smooth other than one small pit": 2, "smooth": 3, "silky": 3,
                       "not rough at all": 3, "glassy": 3}

def diameter_points(d):
    if 3 < d <= 3.6:
        return 1
    elif 3.6 < d <= 4.2:
        return 2
    else:
        return 3


def grade_apple(points):
    if points >= 8:
        return "Grade A"
    elif points >= 6:
        return "Grade B"
    else:
        return "Grade C"


def generate_sample():
    # sample diameter
    diameter = random.uniform(3, 4.2)
    color = random.choice(list(apple_color_options.keys()))
    texture = random.choice(list(apple_tecture_options.keys()))

    total_points = diameter_points(diameter) + apple_color_options[color] + apple_tecture_options[texture]

    grade = grade_apple(total_points)

    user_msg_start_options = ["I have an apple with a diameter of ",
                              "My apple has a diameter of",
                              "apple with a diameter of ",
                              "grade my apple that has a width of ",
                              "My friend's apple measures a width of "]
    user_msg = random.choice(user_msg_start_options)
    user_msg += "{0:.2f} inches and is {color} and feels {texture}".format(diameter,
                                                                    color=color,
                                                                    texture=texture)

    assistant_msg = "Thank you for your request. This apple is {0} according to the AISX apple grading system.".format(grade)
    sample = {"messages": [{"role": "system", "content": system_msg},
                           {"role": "user", "content": user_msg},
                           {"role": "assistant", "content": assistant_msg}]}

    return sample

In [9]:
with open("../data/apples_train.jsonl", 'w') as jfile:
    for _ in range(700):
        sample = generate_sample()
        jfile.write(json.dumps(sample))
        jfile.write('\n')

with open("../data/apples_validation.jsonl", 'w') as jfile:
    for _ in range(150):
        sample = generate_sample()
        jfile.write(json.dumps(sample))
        jfile.write('\n')

with open("../data/apples_test.jsonl", 'w') as jfile:
    for _ in range(150):
        sample = generate_sample()
        jfile.write(json.dumps(sample))
        jfile.write('\n')

# Upload Datasets

In [10]:
from openai import OpenAI
from dotenv import load_dotenv
import os
load_dotenv('../.env')
client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))

training_file = client.files.create(file=open("../data/apples_train.jsonl", "rb"), purpose="fine-tune")
validation_file = client.files.create(file=open("../data/apples_validation.jsonl", "rb"), purpose="fine-tune")

# Create Fine Tuning Job

In [11]:
job = client.fine_tuning.jobs.create(training_file=training_file.id,
                                     validation_file=validation_file.id,
                                     model="gpt-4o-mini-2024-07-18")

In [18]:
import time
done = False
while not done:
    status = client.fine_tuning.jobs.retrieve(job.id).status
    print(f"Job status = {status}")
    if "succeed" in status:
        done = True
    time.sleep(3)

Job status = succeeded


In [20]:
# get the name of the fine-tuned model
finetuned_model = client.fine_tuning.jobs.retrieve(job.id).fine_tuned_model

In [21]:
completion = client.chat.completions.create(
  model=finetuned_model,
  messages=[
    {"role": "system", "content": system_msg},
    {"role": "user", "content": "Grade my apple with a diameter of 3.7 inches and red in color with a smooth texture"}
  ]
)
print(completion.choices[0].message)

ChatCompletionMessage(content='Thank you for your request. This apple is Grade A according to the AISX apple grading system.', refusal=None, role='assistant', function_call=None, tool_calls=None)
