In [1]:
import openai
from openai import cli
import os
from dotenv import load_dotenv

In [2]:
openai.api_version = '2022-12-01'
openai.api_base = os.getenv("EAST_US_OPENAI_API_BASE")
openai.api_type = 'azure'
openai.api_key = os.getenv("EAST_US_OPENAI_API_KEY")

In [3]:
# 학습, 검증 데이터 파일 생성 
import shutil
import json

training_file_name = 'training.jsonl'
validation_file_name = 'validation.jsonl'

sample_data = [{"prompt": "When I go to the store, I want an", "completion": "apple."},
    {"prompt": "When I go to work, I want a", "completion": "coffee."},
    {"prompt": "When I go home, I want a", "completion": "soda."}]

print(f'Generating the training file: {training_file_name}')
with open(training_file_name, 'w') as training_file:
    for entry in sample_data:
        json.dump(entry, training_file)
        training_file.write('\n')

print(f'Copying the training file to the validation file')
shutil.copy(training_file_name, validation_file_name)

Generating the training file: training.jsonl
Copying the training file to the validation file


'validation.jsonl'

In [5]:
# # OpenAI에 학습 및 검증 파일이 이미 업로드 되어있는지 확인 
# print('Checking for existing uploaded files.')
# results = []
# files = openai.File.list().data
# print(f'Found {len(files)} total uploaded files in the subscription.')
# for item in files:
#     if item["filename"] in [training_file_name, validation_file_name]:
#         results.append(item["id"])
# print(f'Found {len(results)} already uploaded files that match our names.')

Checking for existing uploaded files.
Found 3 total uploaded files in the subscription.
Found 2 already uploaded files that match our names.


In [6]:
# # 학습 및 검증 파일이 이미 OpenAI에 업로드되어 있다면 삭제
# print(f'Deleting already uploaded files...')
# for id in results:
#     openai.File.delete(sid = id)

Deleting already uploaded files...


In [7]:
# OpenAI에 파일 업로드 및 업로드 상태 체크 
import time

def check_status(training_id, validation_id):
    train_status = openai.File.retrieve(training_id)["status"]
    valid_status = openai.File.retrieve(validation_id)["status"]
    print(f'Status (training_file | validation_file): {train_status} | {valid_status}')
    return (train_status, valid_status)

#importing our two files
training_id = cli.FineTune._get_or_upload(training_file_name, True)
validation_id = cli.FineTune._get_or_upload(validation_file_name, True)

#checking the status of the imports
(train_status, valid_status) = check_status(training_id, validation_id)

while train_status not in ["succeeded", "failed"] or valid_status not in ["succeeded", "failed"]:
    time.sleep(1)
    (train_status, valid_status) = check_status(training_id, validation_id)

Upload progress: 100%|██████████| 204/204 [00:00<00:00, 205kit/s]


Uploaded file from training.jsonl: file-9f319918c6854cf192e2fb9c2afa2f00


Upload progress: 100%|██████████| 204/204 [00:00<00:00, 205kit/s]


Uploaded file from validation.jsonl: file-c80354f9fdc14b60a8eef8638db793d5
Status (training_file | validation_file): running | notRunning
Status (training_file | validation_file): running | notRunning
Status (training_file | validation_file): succeeded | running
Status (training_file | validation_file): succeeded | running
Status (training_file | validation_file): succeeded | running
Status (training_file | validation_file): succeeded | succeeded


In [8]:
# print(f'Downloading training file: {training_id}')
# result = openai.File.download(training_id)
# print(result.decode('utf-8'))

Downloading training file: file-9f319918c6854cf192e2fb9c2afa2f00
{"prompt": "When I go to the store, I want an", "completion": "apple."}
{"prompt": "When I go to work, I want a", "completion": "coffee."}
{"prompt": "When I go home, I want a", "completion": "soda."}



In [9]:
# 모델 학습
create_args = {
    "training_file": training_id,
    "validation_file": validation_id,
    "model": "babbage",
    "compute_classification_metrics": True,
    "classification_n_classes": 3,
    "n_epochs": 20,
    "batch_size": 3,
    "learning_rate_multiplier": 0.3
}
resp = openai.FineTune.create(**create_args)
job_id = resp["id"]
status = resp["status"]

print(f'Fine-tunning model with jobID: {job_id}.')

Fine-tunning model with jobID: ft-e0ef93edeec84e50b266d02b3a6106b4.


In [10]:
# import signal
# import datetime

# def signal_handler(sig, frame):
#     status = openai.FineTune.retrieve(job_id).status
#     print(f"Stream interrupted. Job is still {status}.")
#     return

# print(f'Streaming events for the fine-tuning job: {job_id}')
# signal.signal(signal.SIGINT, signal_handler)

# events = openai.FineTune.stream_events(job_id)
# try:
#     for event in events:
#         print(f'{datetime.datetime.fromtimestamp(event["created_at"])} {event["message"]}')

# except Exception:
#     print("Stream interrupted (client disconnected).")

Streaming events for the fine-tuning job: ft-e0ef93edeec84e50b266d02b3a6106b4
2023-04-21 13:47:24 Job enqueued. Waiting for jobs ahead to complete.
2023-04-21 13:47:43 Job started.
2023-04-21 13:48:39 Preprocessing started.
Stream interrupted (client disconnected).


In [11]:
# 모델 상태 확인
status = openai.FineTune.retrieve(id=job_id)["status"]
if status not in ["succeeded", "failed"]:
    print(f'Job not in terminal status: {status}. Waiting.')
    while status not in ["succeeded", "failed"]:
        time.sleep(2)
        status = openai.FineTune.retrieve(id=job_id)["status"]
        print(f'Status: {status}')
else:
    print(f'Finetune job {job_id} finished with status: {status}')

print('Checking other finetune jobs in the subscription.')
result = openai.FineTune.list()
print(f'Found {len(result.data)} finetune jobs.')

Job not in terminal status: running. Waiting.
Status: running
Status: running
Status: running
Status: running
Status: running
Status: running
Status: running
Status: running
Status: running
Status: running
Status: running
Status: running
Status: running
Status: running
Status: running
Status: running
Status: running
Status: running
Status: running
Status: running
Status: running
Status: running
Status: running
Status: running
Status: running
Status: running
Status: running
Status: running
Status: running
Status: running
Status: running
Status: running
Status: running
Status: running
Status: running
Status: running
Status: running
Status: running
Status: running
Status: running
Status: running
Status: running
Status: running
Status: running
Status: running
Status: running
Status: running
Status: running
Status: running
Status: running
Status: running
Status: running
Status: running
Status: running
Status: running
Status: running
Status: running
Status: running
Status: running
Status: ru

KeyboardInterrupt: 

In [27]:
# 모델 배포 
result = openai.FineTune.retrieve(id=job_id)
if result["status"] == 'succeeded':
    model = result["fine_tuned_model"]

print(f'Creating a new deployment with model: {model}')
result = openai.Deployment.create(model=model, scale_settings={"scale_type":"standard"})
deployment_id = result["id"]

Creating a new deployment with model: babbage.ft-fdb80b3a305843979b8508c55f387567


In [29]:
# 배포 상태 확인 
print(f'Checking for deployment status.')
resp = openai.Deployment.retrieve(id=deployment_id)
status = resp["status"]
print(f'Deployment {deployment_id} is with status: {status}')

Checking for deployment status.
Deployment deployment-e1eed3749c154a5c88158f62231b35be is with status: failed


In [28]:
print('Sending a test completion job')
start_phrase = 'When I go home, I want a'
response = openai.Completion.create(deployment_id=deployment_id, prompt=start_phrase, temperature=0, stop=".")
text = response['choices'][0]['text'].replace('\n', '').replace(' .', '.').strip()
print(f'"{start_phrase} {text}."')

Sending a test completion job


InvalidRequestError: The API deployment for the resource is not ready, please wait until provisioningState becomes Succeeded.