Code from OpenAI Cookbook
https://cookbook.openai.com/examples/chat_finetuning_data_prep

In [2]:
import numpy as np
import pandas as pd
import os
import itertools
import json
import random
import openai
import time

import helper_functions as fu
import cookbook_function as cbf

In [3]:
os.environ['TestKey3'] = 'sk-proj-GL73kbRwhRpgN3EmXz1YT3BlbkFJEMJhTsinxQDel42BZdNz' 
client = openai.OpenAI(api_key=os.environ['TestKey3'])
headers = {
    "Authorization": f"Bearer {os.environ['TestKey3']}"
}

In [4]:
# read in ft messages for generation task
data_paths = ["data/finetuning_data/train_gen.jsonl"]

# Load the dataset
datasets=[]
for data_path in data_paths:
    with open(data_path, 'r', encoding='utf-8') as f:
        dataset = [json.loads(line) for line in f]
        print(type(dataset))
    datasets.append(dataset)
    # Initial dataset stats
    print("{} :".format(data_path))
    print("Num examples:", len(dataset))
    print("First example:")
    for message in dataset[0]["messages"]:
        print(message)
    

<class 'list'>
data/finetuning_data/train_gen.jsonl :
Num examples: 13
First example:
{'role': 'system', 'content': "Imagine you are an experienced policymaker in the European Parliament. When provided with a legislative proposal supported by either a left- or right-leaning, a general or no majority, your task is to modify the text to potentially gain support from counterfactual majority. Return the revised full text of the proposal with changes highlighted. If the opposing majority would fundamentally reject the proposal based on its topic or core principles, respond with: 'A [left/right] majority would reject the proposal.'"}
{'role': 'user', 'content': 'Supporting majority: None, alter text to: Right majority. Proposal: Listing the third countries whose nationals must be in possession of visas when crossing the external borders and those whose nationals are exempt from that requirement (Kosovo). The proposed Regulation aims to transfer the reference to ‘Kosovo’ from Annex I (list of

In [5]:
# check for format errors
for dataset in data_paths:
    cbf.check_format(dataset)

Found errors:
data_type: 36


In [6]:
cbf.validate_jsonl('data/finetuning_data/train_gen.jsonl')

Valid jsonl file


In [7]:
# check for missing data, distribution of messages in each conversation,
# distribution of tokens per conversation, print token limit warnings

# Warnings and tokens counts
n_missing_system = 0
n_missing_user = 0
n_messages = []
convo_lens = []
assistant_message_lens = []

for dataset in datasets:
    for ex in dataset:
        messages = ex["messages"]
        if not any(message["role"] == "system" for message in messages):
            n_missing_system += 1
        if not any(message["role"] == "user" for message in messages):
            n_missing_user += 1
        n_messages.append(len(messages))
        convo_lens.append(cbf.num_tokens_from_messages(messages))
        assistant_message_lens.append(cbf.num_assistant_tokens_from_messages(messages))
        
    print("Num examples missing system message:", n_missing_system)
    print("Num examples missing user message:", n_missing_user)
    cbf.print_distribution(n_messages, "num_messages_per_example")
    #print_distribution(convo_lens, "num_total_tokens_per_example")
    cbf.print_distribution(assistant_message_lens, "num_assistant_tokens_per_example")
    n_too_long = sum(l > 4096 for l in convo_lens)
    print(f"\n{n_too_long} examples may be over the 4096 token limit, they will be truncated during fine-tuning")

Num examples missing system message: 0
Num examples missing user message: 0

#### Distribution of num_messages_per_example:
min / max: 3, 3
mean / median: 3.0, 3.0
p5 / p95: 3.0, 3.0

#### Distribution of num_assistant_tokens_per_example:
min / max: 7, 415
mean / median: 171.53846153846155, 160.0
p5 / p95: 28.200000000000017, 358.60000000000014

0 examples may be over the 4096 token limit, they will be truncated during fine-tuning


In [8]:
# Pricing and default n_epochs estimate
MAX_TOKENS_PER_EXAMPLE = 4096

TARGET_EPOCHS = 3
MIN_TARGET_EXAMPLES = 100
MAX_TARGET_EXAMPLES = 25000
MIN_DEFAULT_EPOCHS = 1
MAX_DEFAULT_EPOCHS = 25

n_epochs = TARGET_EPOCHS
n_train_examples = len(datasets[0]) # only training data
if n_train_examples * TARGET_EPOCHS < MIN_TARGET_EXAMPLES:
    n_epochs = min(MAX_DEFAULT_EPOCHS, MIN_TARGET_EXAMPLES // n_train_examples)
elif n_train_examples * TARGET_EPOCHS > MAX_TARGET_EXAMPLES:
    n_epochs = max(MIN_DEFAULT_EPOCHS, MAX_TARGET_EXAMPLES // n_train_examples)

n_billing_tokens_in_dataset = sum(min(MAX_TOKENS_PER_EXAMPLE, length) for length in convo_lens)
print(f"Dataset has ~{n_billing_tokens_in_dataset} tokens that will be charged for during training")
print(f"By default, you'll train for {n_epochs} epochs on this dataset")
print(f"By default, you'll be charged for ~{n_epochs * n_billing_tokens_in_dataset} tokens")

Dataset has ~17888 tokens that will be charged for during training
By default, you'll train for 7 epochs on this dataset
By default, you'll be charged for ~125216 tokens


Upload validated files to OpenAI API

In [9]:
# upload validated data file to OpenAI API

train_upload = client.files.create(
  file=open("data/finetuning_data/train_gen.jsonl", "rb"),
  purpose="fine-tune"
  )
print("Uploaded training file id:", train_upload.id)

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/files "HTTP/1.1 200 OK"


Uploaded training file id: file-kBl1kDpPtygbfzK9179rVbt0


Create Fine-Tuning Job via OpenAI Software Development Kit

In [10]:
test1 = client.fine_tuning.jobs.retrieve("ftjob-BnUPm66GFpZ6JgBlboaJX7dp")
print(test1)
print(test1.hyperparameters.n_epochs)

INFO:httpx:HTTP Request: GET https://api.openai.com/v1/fine_tuning/jobs/ftjob-BnUPm66GFpZ6JgBlboaJX7dp "HTTP/1.1 200 OK"


FineTuningJob(id='ftjob-BnUPm66GFpZ6JgBlboaJX7dp', created_at=1722085202, error=Error(code=None, message=None, param=None), fine_tuned_model='ft:gpt-3.5-turbo-0125:lse:mig-gen:9pbPAZan', finished_at=1722085538, hyperparameters=Hyperparameters(n_epochs=5, batch_size=1, learning_rate_multiplier=2.0), model='gpt-3.5-turbo-0125', object='fine_tuning.job', organization_id='org-URxBkHYInUDxHdJfjeVT2W58', result_files=['file-eXLrVS0234yUcoBMtQiqSqXD'], seed=124, status='succeeded', trained_tokens=89310, training_file='file-hzYyqN63Wsz9Zt4mACOX1FZ0', validation_file=None, estimated_finish=None, integrations=[], user_provided_suffix='mig_gen')
5


In [11]:
# Process jobs
all_job_ids = cbf.process_jobs([2, 5, 10], [5, 7, 10], [1, 2], train_upload, 2)
print(all_job_ids)

18 combinations in total
[(2, 5, 1), (2, 5, 2), (2, 7, 1), (2, 7, 2), (2, 10, 1), (2, 10, 2), (5, 5, 1), (5, 5, 2), (5, 7, 1), (5, 7, 2), (5, 10, 1), (5, 10, 2), (10, 5, 1), (10, 5, 2), (10, 7, 1), (10, 7, 2), (10, 10, 1), (10, 10, 2)]
Processing hyperparameters (lr=2, epoch=5, batch=1)


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/fine_tuning/jobs "HTTP/1.1 200 OK"


Job created with ID ftjob-HCLyCTqZ7mZjwgEkEnXUtZpo
Processing hyperparameters (lr=2, epoch=5, batch=2)
Rate limiting: Sleeping for 30.00 seconds


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/fine_tuning/jobs "HTTP/1.1 429 Too Many Requests"
INFO:openai._base_client:Retrying request to /fine_tuning/jobs in 0.795325 seconds
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/fine_tuning/jobs "HTTP/1.1 429 Too Many Requests"
INFO:openai._base_client:Retrying request to /fine_tuning/jobs in 1.614474 seconds
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/fine_tuning/jobs "HTTP/1.1 429 Too Many Requests"


Rate limit exceeded. Retrying in 1.06 seconds...
Processing hyperparameters (lr=2, epoch=5, batch=2)


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/fine_tuning/jobs "HTTP/1.1 429 Too Many Requests"
INFO:openai._base_client:Retrying request to /fine_tuning/jobs in 0.849818 seconds
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/fine_tuning/jobs "HTTP/1.1 429 Too Many Requests"
INFO:openai._base_client:Retrying request to /fine_tuning/jobs in 1.713820 seconds
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/fine_tuning/jobs "HTTP/1.1 429 Too Many Requests"


Rate limit exceeded. Retrying in 2.79 seconds...
Processing hyperparameters (lr=2, epoch=5, batch=2)


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/fine_tuning/jobs "HTTP/1.1 429 Too Many Requests"
INFO:openai._base_client:Retrying request to /fine_tuning/jobs in 0.872751 seconds
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/fine_tuning/jobs "HTTP/1.1 429 Too Many Requests"
INFO:openai._base_client:Retrying request to /fine_tuning/jobs in 1.849779 seconds
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/fine_tuning/jobs "HTTP/1.1 200 OK"


Job created with ID ftjob-xdXogKpinn4WiE6iTUZhWMhV
Processing hyperparameters (lr=2, epoch=7, batch=1)
Rate limiting: Sleeping for 30.00 seconds


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/fine_tuning/jobs "HTTP/1.1 429 Too Many Requests"
INFO:openai._base_client:Retrying request to /fine_tuning/jobs in 0.831668 seconds
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/fine_tuning/jobs "HTTP/1.1 429 Too Many Requests"
INFO:openai._base_client:Retrying request to /fine_tuning/jobs in 1.622463 seconds
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/fine_tuning/jobs "HTTP/1.1 429 Too Many Requests"


Rate limit exceeded. Retrying in 1.92 seconds...
Processing hyperparameters (lr=2, epoch=7, batch=1)


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/fine_tuning/jobs "HTTP/1.1 429 Too Many Requests"
INFO:openai._base_client:Retrying request to /fine_tuning/jobs in 0.983177 seconds
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/fine_tuning/jobs "HTTP/1.1 429 Too Many Requests"
INFO:openai._base_client:Retrying request to /fine_tuning/jobs in 1.743003 seconds
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/fine_tuning/jobs "HTTP/1.1 429 Too Many Requests"


Rate limit exceeded. Retrying in 2.93 seconds...
Processing hyperparameters (lr=2, epoch=7, batch=1)


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/fine_tuning/jobs "HTTP/1.1 429 Too Many Requests"
INFO:openai._base_client:Retrying request to /fine_tuning/jobs in 0.986262 seconds
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/fine_tuning/jobs "HTTP/1.1 429 Too Many Requests"
INFO:openai._base_client:Retrying request to /fine_tuning/jobs in 1.681078 seconds
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/fine_tuning/jobs "HTTP/1.1 429 Too Many Requests"


Rate limit exceeded. Retrying in 4.81 seconds...
Processing hyperparameters (lr=2, epoch=7, batch=1)


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/fine_tuning/jobs "HTTP/1.1 429 Too Many Requests"
INFO:openai._base_client:Retrying request to /fine_tuning/jobs in 0.903644 seconds
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/fine_tuning/jobs "HTTP/1.1 429 Too Many Requests"
INFO:openai._base_client:Retrying request to /fine_tuning/jobs in 1.868611 seconds
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/fine_tuning/jobs "HTTP/1.1 429 Too Many Requests"


Rate limit exceeded. Retrying in 8.29 seconds...
Processing hyperparameters (lr=2, epoch=7, batch=1)


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/fine_tuning/jobs "HTTP/1.1 429 Too Many Requests"
INFO:openai._base_client:Retrying request to /fine_tuning/jobs in 0.878240 seconds
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/fine_tuning/jobs "HTTP/1.1 429 Too Many Requests"
INFO:openai._base_client:Retrying request to /fine_tuning/jobs in 1.516926 seconds
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/fine_tuning/jobs "HTTP/1.1 429 Too Many Requests"


Rate limit exceeded. Retrying in 16.56 seconds...
Processing hyperparameters (lr=2, epoch=7, batch=2)


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/fine_tuning/jobs "HTTP/1.1 429 Too Many Requests"
INFO:openai._base_client:Retrying request to /fine_tuning/jobs in 0.889412 seconds
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/fine_tuning/jobs "HTTP/1.1 429 Too Many Requests"
INFO:openai._base_client:Retrying request to /fine_tuning/jobs in 1.764873 seconds
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/fine_tuning/jobs "HTTP/1.1 429 Too Many Requests"


Rate limit exceeded. Retrying in 1.13 seconds...


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/fine_tuning/jobs "HTTP/1.1 429 Too Many Requests"


Processing hyperparameters (lr=2, epoch=7, batch=2)


INFO:openai._base_client:Retrying request to /fine_tuning/jobs in 0.949075 seconds
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/fine_tuning/jobs "HTTP/1.1 429 Too Many Requests"
INFO:openai._base_client:Retrying request to /fine_tuning/jobs in 1.807965 seconds
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/fine_tuning/jobs "HTTP/1.1 429 Too Many Requests"


Rate limit exceeded. Retrying in 2.29 seconds...


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/fine_tuning/jobs "HTTP/1.1 429 Too Many Requests"


Processing hyperparameters (lr=2, epoch=7, batch=2)


INFO:openai._base_client:Retrying request to /fine_tuning/jobs in 0.996266 seconds
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/fine_tuning/jobs "HTTP/1.1 429 Too Many Requests"
INFO:openai._base_client:Retrying request to /fine_tuning/jobs in 1.685970 seconds
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/fine_tuning/jobs "HTTP/1.1 429 Too Many Requests"


Rate limit exceeded. Retrying in 4.95 seconds...
Processing hyperparameters (lr=2, epoch=7, batch=2)


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/fine_tuning/jobs "HTTP/1.1 429 Too Many Requests"
INFO:openai._base_client:Retrying request to /fine_tuning/jobs in 0.955661 seconds
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/fine_tuning/jobs "HTTP/1.1 429 Too Many Requests"
INFO:openai._base_client:Retrying request to /fine_tuning/jobs in 1.512815 seconds
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/fine_tuning/jobs "HTTP/1.1 429 Too Many Requests"


Rate limit exceeded. Retrying in 8.58 seconds...
Processing hyperparameters (lr=2, epoch=7, batch=2)


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/fine_tuning/jobs "HTTP/1.1 429 Too Many Requests"
INFO:openai._base_client:Retrying request to /fine_tuning/jobs in 0.809574 seconds
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/fine_tuning/jobs "HTTP/1.1 429 Too Many Requests"
INFO:openai._base_client:Retrying request to /fine_tuning/jobs in 1.703940 seconds
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/fine_tuning/jobs "HTTP/1.1 429 Too Many Requests"


Rate limit exceeded. Retrying in 16.33 seconds...
Processing hyperparameters (lr=2, epoch=10, batch=1)


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/fine_tuning/jobs "HTTP/1.1 429 Too Many Requests"
INFO:openai._base_client:Retrying request to /fine_tuning/jobs in 0.873943 seconds
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/fine_tuning/jobs "HTTP/1.1 429 Too Many Requests"
INFO:openai._base_client:Retrying request to /fine_tuning/jobs in 1.951596 seconds
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/fine_tuning/jobs "HTTP/1.1 429 Too Many Requests"


Rate limit exceeded. Retrying in 1.49 seconds...
Processing hyperparameters (lr=2, epoch=10, batch=1)


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/fine_tuning/jobs "HTTP/1.1 429 Too Many Requests"
INFO:openai._base_client:Retrying request to /fine_tuning/jobs in 0.812983 seconds
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/fine_tuning/jobs "HTTP/1.1 429 Too Many Requests"
INFO:openai._base_client:Retrying request to /fine_tuning/jobs in 1.524491 seconds
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/fine_tuning/jobs "HTTP/1.1 429 Too Many Requests"


Rate limit exceeded. Retrying in 2.70 seconds...
Processing hyperparameters (lr=2, epoch=10, batch=1)


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/fine_tuning/jobs "HTTP/1.1 429 Too Many Requests"
INFO:openai._base_client:Retrying request to /fine_tuning/jobs in 0.844718 seconds
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/fine_tuning/jobs "HTTP/1.1 429 Too Many Requests"
INFO:openai._base_client:Retrying request to /fine_tuning/jobs in 1.638841 seconds
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/fine_tuning/jobs "HTTP/1.1 429 Too Many Requests"


Rate limit exceeded. Retrying in 4.14 seconds...
Processing hyperparameters (lr=2, epoch=10, batch=1)


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/fine_tuning/jobs "HTTP/1.1 429 Too Many Requests"
INFO:openai._base_client:Retrying request to /fine_tuning/jobs in 0.815840 seconds
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/fine_tuning/jobs "HTTP/1.1 429 Too Many Requests"
INFO:openai._base_client:Retrying request to /fine_tuning/jobs in 1.748543 seconds
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/fine_tuning/jobs "HTTP/1.1 429 Too Many Requests"


Rate limit exceeded. Retrying in 8.83 seconds...
Processing hyperparameters (lr=2, epoch=10, batch=1)


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/fine_tuning/jobs "HTTP/1.1 200 OK"


Job created with ID ftjob-O3ut4OV4mPICkW7Qo6ZMVbGr
Processing hyperparameters (lr=2, epoch=10, batch=2)
Rate limiting: Sleeping for 30.00 seconds


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/fine_tuning/jobs "HTTP/1.1 429 Too Many Requests"
INFO:openai._base_client:Retrying request to /fine_tuning/jobs in 0.819653 seconds
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/fine_tuning/jobs "HTTP/1.1 429 Too Many Requests"
INFO:openai._base_client:Retrying request to /fine_tuning/jobs in 1.925444 seconds
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/fine_tuning/jobs "HTTP/1.1 429 Too Many Requests"


Rate limit exceeded. Retrying in 1.82 seconds...
Processing hyperparameters (lr=2, epoch=10, batch=2)


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/fine_tuning/jobs "HTTP/1.1 429 Too Many Requests"
INFO:openai._base_client:Retrying request to /fine_tuning/jobs in 0.786939 seconds
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/fine_tuning/jobs "HTTP/1.1 429 Too Many Requests"
INFO:openai._base_client:Retrying request to /fine_tuning/jobs in 1.705624 seconds
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/fine_tuning/jobs "HTTP/1.1 429 Too Many Requests"


Rate limit exceeded. Retrying in 2.15 seconds...
Processing hyperparameters (lr=2, epoch=10, batch=2)


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/fine_tuning/jobs "HTTP/1.1 429 Too Many Requests"
INFO:openai._base_client:Retrying request to /fine_tuning/jobs in 0.801205 seconds
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/fine_tuning/jobs "HTTP/1.1 429 Too Many Requests"
INFO:openai._base_client:Retrying request to /fine_tuning/jobs in 1.515753 seconds
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/fine_tuning/jobs "HTTP/1.1 429 Too Many Requests"


Rate limit exceeded. Retrying in 4.79 seconds...
Processing hyperparameters (lr=2, epoch=10, batch=2)


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/fine_tuning/jobs "HTTP/1.1 429 Too Many Requests"
INFO:openai._base_client:Retrying request to /fine_tuning/jobs in 0.974257 seconds
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/fine_tuning/jobs "HTTP/1.1 429 Too Many Requests"
INFO:openai._base_client:Retrying request to /fine_tuning/jobs in 1.925255 seconds
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/fine_tuning/jobs "HTTP/1.1 429 Too Many Requests"


Rate limit exceeded. Retrying in 8.24 seconds...
Processing hyperparameters (lr=2, epoch=10, batch=2)


INFO:openai._base_client:Retrying request to /fine_tuning/jobs in 0.902286 seconds
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/fine_tuning/jobs "HTTP/1.1 200 OK"


Job created with ID ftjob-ON0wLwTjI2SjVZQDignioYzF
Processing hyperparameters (lr=5, epoch=5, batch=1)
Rate limiting: Sleeping for 30.00 seconds


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/fine_tuning/jobs "HTTP/1.1 200 OK"


Job created with ID ftjob-3JKYeNtI51VsKsFkkiLR7GA4
Processing hyperparameters (lr=5, epoch=5, batch=2)
Rate limiting: Sleeping for 30.00 seconds


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/fine_tuning/jobs "HTTP/1.1 200 OK"


Job created with ID ftjob-nyUJ4ecGqsEbu4M6d5EyTaxE
Processing hyperparameters (lr=5, epoch=7, batch=1)
Rate limiting: Sleeping for 30.00 seconds


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/fine_tuning/jobs "HTTP/1.1 429 Too Many Requests"
INFO:openai._base_client:Retrying request to /fine_tuning/jobs in 0.872933 seconds
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/fine_tuning/jobs "HTTP/1.1 429 Too Many Requests"
INFO:openai._base_client:Retrying request to /fine_tuning/jobs in 1.930714 seconds
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/fine_tuning/jobs "HTTP/1.1 429 Too Many Requests"


Rate limit exceeded. Retrying in 1.71 seconds...
Processing hyperparameters (lr=5, epoch=7, batch=1)


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/fine_tuning/jobs "HTTP/1.1 429 Too Many Requests"
INFO:openai._base_client:Retrying request to /fine_tuning/jobs in 0.971169 seconds
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/fine_tuning/jobs "HTTP/1.1 429 Too Many Requests"
INFO:openai._base_client:Retrying request to /fine_tuning/jobs in 1.805145 seconds


An error occurred: Connection error.
Processing hyperparameters (lr=5, epoch=7, batch=2)


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/fine_tuning/jobs "HTTP/1.1 200 OK"


Job created with ID ftjob-NY5KTAow7jK2cg5D0If3rAGm
Processing hyperparameters (lr=5, epoch=10, batch=1)
Rate limiting: Sleeping for 30.00 seconds


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/fine_tuning/jobs "HTTP/1.1 200 OK"


Job created with ID ftjob-bzOfgwk5V3K8irNhnufIKpAF
Processing hyperparameters (lr=5, epoch=10, batch=2)
Rate limiting: Sleeping for 30.00 seconds


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/fine_tuning/jobs "HTTP/1.1 200 OK"


Job created with ID ftjob-aGlS8HfT7wmQmv2OXQVC5dvU
Processing hyperparameters (lr=10, epoch=5, batch=1)
Rate limiting: Sleeping for 30.00 seconds


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/fine_tuning/jobs "HTTP/1.1 200 OK"


Job created with ID ftjob-rfQsJ2Grl2fvbouTYTeKELTM
Processing hyperparameters (lr=10, epoch=5, batch=2)
Rate limiting: Sleeping for 30.00 seconds


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/fine_tuning/jobs "HTTP/1.1 429 Too Many Requests"
INFO:openai._base_client:Retrying request to /fine_tuning/jobs in 0.794952 seconds
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/fine_tuning/jobs "HTTP/1.1 429 Too Many Requests"
INFO:openai._base_client:Retrying request to /fine_tuning/jobs in 1.629994 seconds
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/fine_tuning/jobs "HTTP/1.1 429 Too Many Requests"


Rate limit exceeded. Retrying in 1.70 seconds...
Processing hyperparameters (lr=10, epoch=5, batch=2)


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/fine_tuning/jobs "HTTP/1.1 429 Too Many Requests"
INFO:openai._base_client:Retrying request to /fine_tuning/jobs in 0.870958 seconds
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/fine_tuning/jobs "HTTP/1.1 429 Too Many Requests"
INFO:openai._base_client:Retrying request to /fine_tuning/jobs in 1.510356 seconds
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/fine_tuning/jobs "HTTP/1.1 429 Too Many Requests"


Rate limit exceeded. Retrying in 2.21 seconds...
Processing hyperparameters (lr=10, epoch=5, batch=2)


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/fine_tuning/jobs "HTTP/1.1 429 Too Many Requests"
INFO:openai._base_client:Retrying request to /fine_tuning/jobs in 0.836638 seconds
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/fine_tuning/jobs "HTTP/1.1 429 Too Many Requests"
INFO:openai._base_client:Retrying request to /fine_tuning/jobs in 1.987924 seconds
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/fine_tuning/jobs "HTTP/1.1 429 Too Many Requests"


Rate limit exceeded. Retrying in 4.47 seconds...
Processing hyperparameters (lr=10, epoch=5, batch=2)


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/fine_tuning/jobs "HTTP/1.1 429 Too Many Requests"
INFO:openai._base_client:Retrying request to /fine_tuning/jobs in 0.861121 seconds
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/fine_tuning/jobs "HTTP/1.1 429 Too Many Requests"
INFO:openai._base_client:Retrying request to /fine_tuning/jobs in 1.722692 seconds
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/fine_tuning/jobs "HTTP/1.1 429 Too Many Requests"


Rate limit exceeded. Retrying in 8.25 seconds...
Processing hyperparameters (lr=10, epoch=5, batch=2)


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/fine_tuning/jobs "HTTP/1.1 429 Too Many Requests"
INFO:openai._base_client:Retrying request to /fine_tuning/jobs in 0.947620 seconds
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/fine_tuning/jobs "HTTP/1.1 429 Too Many Requests"
INFO:openai._base_client:Retrying request to /fine_tuning/jobs in 1.745004 seconds
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/fine_tuning/jobs "HTTP/1.1 429 Too Many Requests"


Rate limit exceeded. Retrying in 16.64 seconds...
Processing hyperparameters (lr=10, epoch=7, batch=1)


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/fine_tuning/jobs "HTTP/1.1 429 Too Many Requests"
INFO:openai._base_client:Retrying request to /fine_tuning/jobs in 0.830541 seconds
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/fine_tuning/jobs "HTTP/1.1 429 Too Many Requests"
INFO:openai._base_client:Retrying request to /fine_tuning/jobs in 1.640347 seconds
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/fine_tuning/jobs "HTTP/1.1 429 Too Many Requests"


Rate limit exceeded. Retrying in 1.34 seconds...
Processing hyperparameters (lr=10, epoch=7, batch=1)


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/fine_tuning/jobs "HTTP/1.1 429 Too Many Requests"
INFO:openai._base_client:Retrying request to /fine_tuning/jobs in 0.889163 seconds
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/fine_tuning/jobs "HTTP/1.1 429 Too Many Requests"
INFO:openai._base_client:Retrying request to /fine_tuning/jobs in 1.754498 seconds
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/fine_tuning/jobs "HTTP/1.1 429 Too Many Requests"


Rate limit exceeded. Retrying in 2.47 seconds...
Processing hyperparameters (lr=10, epoch=7, batch=1)


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/fine_tuning/jobs "HTTP/1.1 429 Too Many Requests"
INFO:openai._base_client:Retrying request to /fine_tuning/jobs in 0.856528 seconds
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/fine_tuning/jobs "HTTP/1.1 429 Too Many Requests"
INFO:openai._base_client:Retrying request to /fine_tuning/jobs in 1.929717 seconds
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/fine_tuning/jobs "HTTP/1.1 429 Too Many Requests"


Rate limit exceeded. Retrying in 4.75 seconds...
Processing hyperparameters (lr=10, epoch=7, batch=1)


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/fine_tuning/jobs "HTTP/1.1 429 Too Many Requests"
INFO:openai._base_client:Retrying request to /fine_tuning/jobs in 0.893899 seconds
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/fine_tuning/jobs "HTTP/1.1 429 Too Many Requests"
INFO:openai._base_client:Retrying request to /fine_tuning/jobs in 1.969302 seconds
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/fine_tuning/jobs "HTTP/1.1 429 Too Many Requests"


Rate limit exceeded. Retrying in 8.14 seconds...
Processing hyperparameters (lr=10, epoch=7, batch=1)


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/fine_tuning/jobs "HTTP/1.1 429 Too Many Requests"
INFO:openai._base_client:Retrying request to /fine_tuning/jobs in 0.849192 seconds
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/fine_tuning/jobs "HTTP/1.1 429 Too Many Requests"
INFO:openai._base_client:Retrying request to /fine_tuning/jobs in 1.929166 seconds
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/fine_tuning/jobs "HTTP/1.1 429 Too Many Requests"


Rate limit exceeded. Retrying in 16.53 seconds...
Processing hyperparameters (lr=10, epoch=7, batch=2)


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/fine_tuning/jobs "HTTP/1.1 429 Too Many Requests"
INFO:openai._base_client:Retrying request to /fine_tuning/jobs in 0.985378 seconds
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/fine_tuning/jobs "HTTP/1.1 429 Too Many Requests"
INFO:openai._base_client:Retrying request to /fine_tuning/jobs in 1.937072 seconds
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/fine_tuning/jobs "HTTP/1.1 429 Too Many Requests"


Rate limit exceeded. Retrying in 1.90 seconds...
Processing hyperparameters (lr=10, epoch=7, batch=2)


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/fine_tuning/jobs "HTTP/1.1 429 Too Many Requests"
INFO:openai._base_client:Retrying request to /fine_tuning/jobs in 0.978142 seconds
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/fine_tuning/jobs "HTTP/1.1 429 Too Many Requests"
INFO:openai._base_client:Retrying request to /fine_tuning/jobs in 1.605556 seconds
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/fine_tuning/jobs "HTTP/1.1 429 Too Many Requests"


Rate limit exceeded. Retrying in 2.12 seconds...
Processing hyperparameters (lr=10, epoch=7, batch=2)


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/fine_tuning/jobs "HTTP/1.1 429 Too Many Requests"
INFO:openai._base_client:Retrying request to /fine_tuning/jobs in 0.771459 seconds
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/fine_tuning/jobs "HTTP/1.1 429 Too Many Requests"
INFO:openai._base_client:Retrying request to /fine_tuning/jobs in 1.930214 seconds
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/fine_tuning/jobs "HTTP/1.1 429 Too Many Requests"


Rate limit exceeded. Retrying in 4.59 seconds...
Processing hyperparameters (lr=10, epoch=7, batch=2)


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/fine_tuning/jobs "HTTP/1.1 429 Too Many Requests"
INFO:openai._base_client:Retrying request to /fine_tuning/jobs in 0.829855 seconds
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/fine_tuning/jobs "HTTP/1.1 429 Too Many Requests"
INFO:openai._base_client:Retrying request to /fine_tuning/jobs in 1.909898 seconds
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/fine_tuning/jobs "HTTP/1.1 429 Too Many Requests"


Rate limit exceeded. Retrying in 8.57 seconds...
Processing hyperparameters (lr=10, epoch=7, batch=2)


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/fine_tuning/jobs "HTTP/1.1 429 Too Many Requests"
INFO:openai._base_client:Retrying request to /fine_tuning/jobs in 0.775841 seconds
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/fine_tuning/jobs "HTTP/1.1 429 Too Many Requests"
INFO:openai._base_client:Retrying request to /fine_tuning/jobs in 1.573490 seconds
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/fine_tuning/jobs "HTTP/1.1 429 Too Many Requests"


Rate limit exceeded. Retrying in 16.96 seconds...
Processing hyperparameters (lr=10, epoch=10, batch=1)


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/fine_tuning/jobs "HTTP/1.1 429 Too Many Requests"
INFO:openai._base_client:Retrying request to /fine_tuning/jobs in 0.981129 seconds
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/fine_tuning/jobs "HTTP/1.1 429 Too Many Requests"
INFO:openai._base_client:Retrying request to /fine_tuning/jobs in 1.910365 seconds
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/fine_tuning/jobs "HTTP/1.1 429 Too Many Requests"


Rate limit exceeded. Retrying in 1.03 seconds...
Processing hyperparameters (lr=10, epoch=10, batch=1)


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/fine_tuning/jobs "HTTP/1.1 429 Too Many Requests"
INFO:openai._base_client:Retrying request to /fine_tuning/jobs in 0.984704 seconds
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/fine_tuning/jobs "HTTP/1.1 429 Too Many Requests"
INFO:openai._base_client:Retrying request to /fine_tuning/jobs in 1.885214 seconds
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/fine_tuning/jobs "HTTP/1.1 429 Too Many Requests"


Rate limit exceeded. Retrying in 2.78 seconds...
Processing hyperparameters (lr=10, epoch=10, batch=1)


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/fine_tuning/jobs "HTTP/1.1 429 Too Many Requests"
INFO:openai._base_client:Retrying request to /fine_tuning/jobs in 0.922980 seconds
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/fine_tuning/jobs "HTTP/1.1 429 Too Many Requests"
INFO:openai._base_client:Retrying request to /fine_tuning/jobs in 1.992440 seconds


An error occurred: Request timed out.
Processing hyperparameters (lr=10, epoch=10, batch=2)


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/fine_tuning/jobs "HTTP/1.1 200 OK"


Job created with ID ftjob-ybidDEBWMktMLknd9MZAJ4kn
All jobs processed.
FineTuningJob(id='ftjob-ybidDEBWMktMLknd9MZAJ4kn', created_at=1722163006, error=Error(code=None, message=None, param=None), fine_tuned_model=None, finished_at=None, hyperparameters=Hyperparameters(n_epochs=10, batch_size=2, learning_rate_multiplier=10.0), model='gpt-3.5-turbo-0125', object='fine_tuning.job', organization_id='org-URxBkHYInUDxHdJfjeVT2W58', result_files=[], seed=124, status='validating_files', trained_tokens=None, training_file='file-kBl1kDpPtygbfzK9179rVbt0', validation_file=None, estimated_finish=None, integrations=[], user_provided_suffix='mig_gen')


Getting Metrics / Evaluation

In [1]:
# Extract information about the jobs
all_results = cbf.extract_job_info(all_job_ids)

NameError: name 'extract_job_info' is not defined

In [14]:
# go through column "result_file_name" of all_results_df and get the content of the result files
# als neue Spalte in all_results_df einfügen
# get the metrics and checkpoints data for each job

for i, (job_id, result_file_name) in enumerate(zip(all_results.job_id, all_results.result_file_name)):
    metrics_df = pd.read_csv(cbf.get_ft_results(result_file_name))
    # add metrics_df to an overall df
    if i == 0:
        all_metrics_df = metrics_df
    else:
        all_metrics_df = pd.concat([all_metrics_df, metrics_df], axis=0)
    checkpoints_df = cbf.get_checkpoint_results(job_id)
    # add checkpoints_df to a overall df
    if i == 0:
        all_checkpoints_df = checkpoints_df
    else:
        all_checkpoints_df = pd.concat([all_checkpoints_df, checkpoints_df], axis=0)

display(all_metrics_df.head())
display(all_checkpoints_df.head())

TypeError: string indices must be integers, not 'str'