In [28]:
import os
import json
import dotenv
import openai
from datetime import date
import requests
from openai import AzureOpenAI
from typing import Optional



dotenv.load_dotenv()

endpoint = os.environ.get("FTQ_AZURE_OPENAI_ENDPOINT")
api_key = os.environ.get("FTQ_AZURE_OPENAI_API_KEY")
deployment = os.environ.get("FTQ_AZURE_OPENAI_DEPLOYMENT_NAME")
api_version = os.environ.get("FTQ_AZURE_OPENAI_API_VERSION")

client = openai.AzureOpenAI(
    azure_endpoint=endpoint,
    api_key=api_key,
    api_version=api_version,
)

In [29]:
# Run preliminary checks

import json

# Load the training set
with open('training_set.jsonl', 'r', encoding='utf-8') as f:
    training_dataset = [json.loads(line) for line in f]

# Training dataset stats
print("Number of examples in training set:", len(training_dataset))
print("First example in training set:")
for message in training_dataset[0]["messages"]:
    print(message)

# Load the validation set
with open('validation_set.jsonl', 'r', encoding='utf-8') as f:
    validation_dataset = [json.loads(line) for line in f]

# Validation dataset stats
print("\nNumber of examples in validation set:", len(validation_dataset))
print("First example in validation set:")
for message in validation_dataset[0]["messages"]:
    print(message)

Number of examples in training set: 10
First example in training set:
{'role': 'system', 'content': 'Clippy is a factual chatbot that is also sarcastic.'}
{'role': 'user', 'content': 'Who discovered Antarctica?'}
{'role': 'assistant', 'content': "Some chaps named Fabian Gottlieb von Bellingshausen and Mikhail Lazarev, as if they don't teach that in every school!"}

Number of examples in validation set: 10
First example in validation set:
{'role': 'system', 'content': 'Clippy is a factual chatbot that is also sarcastic.'}
{'role': 'user', 'content': "What's the capital of Australia?"}
{'role': 'assistant', 'content': "It's Canberra, not Sydney. Shocking, I know!"}


In [30]:
# Validate token counts

import json
import tiktoken
import numpy as np
from collections import defaultdict

encoding = tiktoken.get_encoding("o200k_base") # default encoding for gpt-4o models. This requires the latest version of tiktoken to be installed.

def num_tokens_from_messages(messages, tokens_per_message=3, tokens_per_name=1):
    num_tokens = 0
    for message in messages:
        num_tokens += tokens_per_message
        for key, value in message.items():
            num_tokens += len(encoding.encode(value))
            if key == "name":
                num_tokens += tokens_per_name
    num_tokens += 3
    return num_tokens

def num_assistant_tokens_from_messages(messages):
    num_tokens = 0
    for message in messages:
        if message["role"] == "assistant":
            num_tokens += len(encoding.encode(message["content"]))
    return num_tokens

def print_distribution(values, name):
    print(f"\n#### Distribution of {name}:")
    print(f"min / max: {min(values)}, {max(values)}")
    print(f"mean / median: {np.mean(values)}, {np.median(values)}")
    print(f"p5 / p95: {np.quantile(values, 0.1)}, {np.quantile(values, 0.9)}")

files = ['training_set.jsonl', 'validation_set.jsonl']

for file in files:
    print(f"Processing file: {file}")
    with open(file, 'r', encoding='utf-8') as f:
        dataset = [json.loads(line) for line in f]

    total_tokens = []
    assistant_tokens = []

    for ex in dataset:
        messages = ex.get("messages", {})
        total_tokens.append(num_tokens_from_messages(messages))
        assistant_tokens.append(num_assistant_tokens_from_messages(messages))

    print_distribution(total_tokens, "total tokens")
    print_distribution(assistant_tokens, "assistant tokens")
    print('*' * 50)

Processing file: training_set.jsonl

#### Distribution of total tokens:
min / max: 46, 59
mean / median: 49.8, 48.5
p5 / p95: 46.0, 53.599999999999994

#### Distribution of assistant tokens:
min / max: 13, 28
mean / median: 16.5, 14.0
p5 / p95: 13.0, 19.9
**************************************************
Processing file: validation_set.jsonl

#### Distribution of total tokens:
min / max: 41, 64
mean / median: 48.9, 47.0
p5 / p95: 43.7, 54.099999999999994

#### Distribution of assistant tokens:
min / max: 8, 29
mean / median: 15.0, 12.5
p5 / p95: 10.7, 19.999999999999996
**************************************************


In [31]:
training_file_name = 'training_set.jsonl'
validation_file_name = 'validation_set.jsonl'

# Upload the training and validation dataset files to Azure OpenAI with the SDK.

training_response = client.files.create(
    file = open(training_file_name, "rb"), purpose="fine-tune"
)
training_file_id = training_response.id

validation_response = client.files.create(
    file = open(validation_file_name, "rb"), purpose="fine-tune"
)
validation_file_id = validation_response.id

print("Training file ID:", training_file_id)
print("Validation file ID:", validation_file_id)

Training file ID: file-a2dda6c9c2fe43639eabd61f0a9d0167
Validation file ID: file-2bf0bf40e65d41c287f6252094aa144d


In [36]:
# Submit fine-tuning training job

response = client.fine_tuning.jobs.create(
    training_file = training_file_id,
    validation_file = validation_file_id,
    model = "gpt-35-turbo-0125", # Enter base model name. Note that in Azure OpenAI the model name contains dashes and cannot contain dot/period characters.
    seed = 105 # seed parameter controls reproducibility of the fine-tuning job. If no seed is specified one will be generated automatically.
)

job_id = response.id

# You can use the job ID to monitor the status of the fine-tuning job.
# The fine-tuning job will take some time to start and complete.

print("Job ID:", response.id)
print("Status:", response.status)
print(response.model_dump_json(indent=2))

Job ID: ftjob-08832bd28a9b411f816e6146af2b6bf0
Status: pending
{
  "id": "ftjob-08832bd28a9b411f816e6146af2b6bf0",
  "created_at": 1729389163,
  "error": null,
  "fine_tuned_model": null,
  "finished_at": null,
  "hyperparameters": {
    "n_epochs": -1,
    "batch_size": -1,
    "learning_rate_multiplier": 1
  },
  "model": "gpt-35-turbo-0125",
  "object": "fine_tuning.job",
  "organization_id": null,
  "result_files": null,
  "seed": 105,
  "status": "pending",
  "trained_tokens": null,
  "training_file": "file-a2dda6c9c2fe43639eabd61f0a9d0167",
  "validation_file": "file-2bf0bf40e65d41c287f6252094aa144d",
  "estimated_finish": null,
  "integrations": null
}


In [37]:
# Track training status

from IPython.display import clear_output
import time

start_time = time.time()

# Get the status of our fine-tuning job.
response = client.fine_tuning.jobs.retrieve(job_id)

status = response.status

# If the job isn't done yet, poll it every 10 seconds.
while status not in ["succeeded", "failed"]:
    time.sleep(10)

    response = client.fine_tuning.jobs.retrieve(job_id)
    print(response.model_dump_json(indent=2))
    print("Elapsed time: {} minutes {} seconds".format(int((time.time() - start_time) // 60), int((time.time() - start_time) % 60)))
    status = response.status
    print(f'Status: {status}')
    clear_output(wait=True)

print(f'Fine-tuning job {job_id} finished with status: {status}')

# List all fine-tuning jobs for this resource.
print('Checking other fine-tune jobs for this resource.')
response = client.fine_tuning.jobs.list()
print(f'Found {len(response.data)} fine-tune jobs.')

Fine-tuning job ftjob-08832bd28a9b411f816e6146af2b6bf0 finished with status: succeeded
Checking other fine-tune jobs for this resource.
Found 1 fine-tune jobs.


In [38]:
#List fine-tuning events
#API version: 2024-08-01-preview or later is required for this command.

#While not necessary to complete fine-tuning it can be helpful to examine the individual fine-tuning events that were generated during training. 
#The full training results can also be examined after training is complete in the training results file.

response = client.fine_tuning.jobs.list_events(fine_tuning_job_id=job_id, limit=10)
print(response.model_dump_json(indent=2))

{
  "data": [
    {
      "id": "ftevent-df29c4eb48a1446cb29f4e320ef7b5a7",
      "created_at": 1729391315,
      "level": "info",
      "message": "Training tokens billed: 6000",
      "object": "fine_tuning.job.event",
      "type": "message"
    },
    {
      "id": "ftevent-17ea47ae9fac4738b95967e96552b905",
      "created_at": 1729391315,
      "level": "info",
      "message": "Completed results file: file-706c75feef63450285e4a4ac2f2fe587",
      "object": "fine_tuning.job.event",
      "type": "message"
    },
    {
      "id": "ftevent-f280eba83ad644c5baa6ab1944e95393",
      "created_at": 1729391262,
      "level": "info",
      "message": "Job succeeded.",
      "object": "fine_tuning.job.event",
      "type": "message"
    },
    {
      "id": "ftevent-008dcf0ae077743008dcf0ae07774300",
      "created_at": 1729390942,
      "level": "info",
      "message": "Step 100: training loss=0.11779531091451645",
      "object": "fine_tuning.job.event",
      "type": "metrics",
      

In [39]:
#List checkpoints
#API version: 2024-08-01-preview or later is required for this command.

#When each training epoch completes a checkpoint is generated.
#A checkpoint is a fully functional version of a model which can both be deployed and used as the target model for subsequent fine-tuning jobs.
#Checkpoints can be particularly useful, as they can provide a snapshot of your model prior to overfitting having occurred. 
#When a fine-tuning job completes you will have the three most recent versions of the model available to deploy. 
#The final epoch will be represented by your fine-tuned model, the previous two epochs will be available as checkpoints.

response = client.fine_tuning.jobs.checkpoints.list(job_id)
print(response.model_dump_json(indent=2))

{
  "data": [
    {
      "id": "ftchkpt-52916a1b07de49cb8e7c57ca3d864e22",
      "created_at": 1729391006,
      "fine_tuned_model_checkpoint": "gpt-35-turbo-0125.ft-08832bd28a9b411f816e6146af2b6bf0",
      "fine_tuning_job_id": "ftjob-08832bd28a9b411f816e6146af2b6bf0",
      "metrics": {
        "full_valid_loss": 1.1610899983837617,
        "full_valid_mean_token_accuracy": 0.7262569832402235,
        "step": 100.0,
        "train_loss": 0.11779531091451645,
        "train_mean_token_accuracy": 1.0,
        "valid_loss": 1.6439884185791016,
        "valid_mean_token_accuracy": 0.7
      },
      "object": "fine_tuning.job.checkpoint",
      "step_number": 100
    },
    {
      "id": "ftchkpt-6fe974c891dd4f358569e685b5d805aa",
      "created_at": 1729390991,
      "fine_tuned_model_checkpoint": "gpt-35-turbo-0125.ft-08832bd28a9b411f816e6146af2b6bf0:ckpt-step-90",
      "fine_tuning_job_id": "ftjob-08832bd28a9b411f816e6146af2b6bf0",
      "metrics": {
        "full_valid_loss": 1.152

In [40]:
#Final training run results
#To get the final results, run the following:
# Retrieve fine_tuned_model name

response = client.fine_tuning.jobs.retrieve(job_id)

print(response.model_dump_json(indent=2))
fine_tuned_model = response.fine_tuned_model

{
  "id": "ftjob-08832bd28a9b411f816e6146af2b6bf0",
  "created_at": 1729389163,
  "error": null,
  "fine_tuned_model": "gpt-35-turbo-0125.ft-08832bd28a9b411f816e6146af2b6bf0",
  "finished_at": 1729391315,
  "hyperparameters": {
    "n_epochs": 10,
    "batch_size": 1,
    "learning_rate_multiplier": 1
  },
  "model": "gpt-35-turbo-0125",
  "object": "fine_tuning.job",
  "organization_id": null,
  "result_files": [
    "file-706c75feef63450285e4a4ac2f2fe587"
  ],
  "seed": 105,
  "status": "succeeded",
  "trained_tokens": 5210,
  "training_file": "file-a2dda6c9c2fe43639eabd61f0a9d0167",
  "validation_file": "file-2bf0bf40e65d41c287f6252094aa144d",
  "estimated_finish": null,
  "integrations": null
}
