In [None]:
# Test fine-tuning OpenAI to a specific speech pattern

from dotenv import load_dotenv
from openai import OpenAI

In [None]:
load_dotenv(override=True)
openai = OpenAI()

In [None]:
# Split the dataset into train, validation, and test sets

with open('glorp-speech.jsonl', 'r', encoding='utf-8') as f:
    lines = f.readlines()

total_records = len(lines)
print(f"Total records: {total_records}")

train_end = int(total_records * 0.70)
validate_end = train_end + int(total_records * 0.15)

train_data = lines[:train_end]
validate_data = lines[train_end:validate_end]
test_data = lines[validate_end:]

print(f"Train: {len(train_data)} records")
print(f"Validate: {len(validate_data)} records")
print(f"Test: {len(test_data)} records")

with open('train.jsonl', 'w', encoding='utf-8') as f:
    f.writelines(train_data)

with open('validate.jsonl', 'w', encoding='utf-8') as f:
    f.writelines(validate_data)

# with open('test.jsonl', 'w', encoding='utf-8') as f:
#     f.writelines(test_data)

print("Files created")

In [None]:
with open("train.jsonl", "rb") as f:
    train_file = openai.files.create(file=f, purpose="fine-tune")

with open("validate.jsonl", "rb") as f:
    validation_file = openai.files.create(file=f, purpose="fine-tune")

openai.fine_tuning.jobs.create(
    training_file=train_file.id,
    validation_file=validation_file.id,
    model="gpt-4.1-nano-2025-04-14",
    seed=42,
    hyperparameters={"n_epochs": 5, "batch_size": 2},
    suffix="glorp-speech-test-2"
)

[https://platform.openai.com/storage/files/](https://platform.openai.com/storage/files/)

In [None]:
openai.fine_tuning.jobs.list(limit=1)

In [None]:
job_id = openai.fine_tuning.jobs.list(limit=1).data[0].id

In [None]:
job_id

In [None]:
openai.fine_tuning.jobs.retrieve(job_id)

In [None]:
openai.fine_tuning.jobs.list_events(fine_tuning_job_id=job_id, limit=10).data

In [None]:
fine_tuned_model_name = openai.fine_tuning.jobs.retrieve(job_id).fine_tuned_model

In [None]:
fine_tuned_model_name

In [None]:
response = openai.chat.completions.create(
    model=fine_tuned_model_name,
    messages=[
        {"role": "user", "content": "How do I tie a tie?"},
    ],
    max_tokens=200
)
response.choices[0].message.content