# Step 1 - Load the data for fine-tuning
Note, this assumes the files have gone through the basic checks as outlied earlier

In [16]:
# Keys and environment variables

import os
import requests
import json
import time
import openai
from openai import AzureOpenAI

api_key = os.getenv("AOAI_FT_KEY") 
api_base =  os.getenv("AOAI_FT_ENDPOINT")
api_type = 'azure'
api_version = '2023-09-15-preview' #This version or later is required for fine-tuning

# openai.api_key = api_key
# openai.api_base = api_base
# openai.api_type = api_type
# openai.api_version = api_version

    
client = AzureOpenAI(
    api_key=api_key,  
    api_version=api_version,
    azure_endpoint = api_base
    )


In [None]:
print("Endpoint:", api_base)
print("Key:", api_key)


In [18]:
training_file_name = f'data/dog_emoji_FT_train.jsonl'


# This is using v1 of the API which has breaking changes
file = client.files.create(
    file=open(training_file_name, "rb"),
    purpose="fine-tune"
)

print("Response:\n", file)

print("Training file ID:", file.id)
print("Training file name:", file.filename)



Response:
 FileObject(id='file-ca4c57d7ad814211a2db49e0382c5a77', bytes=73874, created_at=1700780969, filename='dog_emoji_FT_train.jsonl', object='file', purpose='fine-tune', status='pending', status_details=None, updated_at=1700780969)
Training file ID: file-ca4c57d7ad814211a2db49e0382c5a77
Training file name: dog_emoji_FT_train.jsonl


In [24]:
from openai import OpenAI

models = client.models.list()
for model in models:
    print(model.id)

ada
babbage
babbage-002
curie
dall-e-3-3.0
davinci
davinci-002
gpt-35-turbo-0301
gpt-35-turbo-0613
gpt-35-turbo-1106
gpt-4-1106-Preview
gpt-35-turbo-instruct-0914
gpt-35-turbo-16k-0613
gpt-4-0314
gpt-4-0613
gpt-4-32k-0314
gpt-4-32k-0613
text-davinci-003
text-similarity-ada-001
text-similarity-babbage-001
text-similarity-curie-001
text-similarity-davinci-001
text-search-ada-doc-001
text-search-ada-query-001
text-search-babbage-doc-001
text-search-babbage-query-001
text-search-curie-doc-001
text-search-curie-query-001
text-search-davinci-doc-001
text-search-davinci-query-001
code-search-ada-code-001
code-search-ada-text-001
code-search-babbage-code-001
code-search-babbage-text-001
text-embedding-ada-002
text-embedding-ada-002-2
gptv-0329
gpt-4v-1015
dall-e-3
gpt-35-turbo
gpt-35-turbo-instruct
gpt-35-turbo-16k
gpt-4
gpt-4-32k
text-embedding-ada-002
gpt-35-turbo-0613.ft-505d5a8bd321406dbf4605b636b0c0cd
gpt-35-turbo-0613.ft-1a7faac8856d46e48a038c02555fe6e5


In [25]:
#Training file ID: file-5559bb6d2c1945b9bd4dea82eb7ff02d
#file-ca4c57d7ad814211a2db49e0382c5a77

ft = client.fine_tuning.jobs.create(
    training_file="file-ca4c57d7ad814211a2db49e0382c5a77",
    model="gpt-35-turbo-0613",
    hyperparameters={
        "n_epochs":3
    },
    suffix="dog-emoji"
)

print("Finetuning job ID:", ft.id)

Finetuning job ID: ftjob-367ee1995af740a0bf24876221585f7a


In [26]:
# List all the FT jobs
ft_jobs = client.fine_tuning.jobs.list()

for ft_job in ft_jobs:
    print(ft_job.id, ft_job.status)

ftjob-367ee1995af740a0bf24876221585f7a pending
ftjob-c41a9dc551834a1aa0be8befe788a22b running
ftjob-1a7faac8856d46e48a038c02555fe6e5 succeeded
ftjob-505d5a8bd321406dbf4605b636b0c0cd succeeded


In [27]:
#List all the FT events for the job from earier - ftjob-367ee1995af740a0bf24876221585f7a
ft_job_events = client.fine_tuning.jobs.list_events(
    fine_tuning_job_id="ftjob-367ee1995af740a0bf24876221585f7a", 
    limit=2
)

for ft_job_event in ft_job_events:
    print(ft_job_event.id, ft_job_event.message)

ftevent-1e89dc7cc62046048bcea50de1cccbb9 Jobs ahead in queue: 1
ftevent-42649f5c7677472f83eaa6cd4cde0dba Job enqueued. Waiting for jobs ahead to complete.


In [None]:
# Track training status
job = "ftjob-367ee1995af740a0bf24876221585f7a"

from IPython.display import clear_output

start_time = time.time()

# Get the status of our fine-tuning job.
ft_job = client.fine_tuning.jobs.retrieve(job)
status = ft_job.status

# If the job isn't done yet, poll it every 10 seconds.
while status not in ["succeeded", "failed"]:
    time.sleep(10)
    
    ft_job = client.fine_tuning.jobs.retrieve(job)
    print(ft_job)
    status = ft_job.status
    print("Elapsed time: {} minutes {} seconds".format(int((time.time() - start_time) // 60), int((time.time() - start_time) % 60)))
    print(f'Status: {status}')

    clear_output(wait=True)

print(f'Fine-tuning job {job} finished with status: {status}')

# List all fine-tuning jobs for this resource.
print('Checking other fine-tune jobs for this resource.')
# List all the FT jobs
ft_jobs = client.fine_tuning.jobs.list()

for ft_job in ft_jobs:
    print(ft_job.id, ft_job.status)
