In [3]:
import openai
from openai import cli
import os
from dotenv import load_dotenv

In [4]:
openai.api_version = '2022-12-01'
openai.api_base = os.getenv("EAST_US_OPENAI_API_BASE")
openai.api_type = 'azure'
openai.api_key = os.getenv("EAST_US_OPENAI_API_KEY")

In [5]:
# 학습, 검증 데이터 파일 생성 
import shutil
import json

training_file_name = 'training.jsonl'
validation_file_name = 'validation.jsonl'

sample_data = [{"prompt": "When I go to the store, I want an", "completion": "apple."},
    {"prompt": "When I go to work, I want a", "completion": "coffee."},
    {"prompt": "When I go home, I want a", "completion": "soda."}]

print(f'Generating the training file: {training_file_name}')
with open(training_file_name, 'w') as training_file:
    for entry in sample_data:
        json.dump(entry, training_file)
        training_file.write('\n')

print(f'Copying the training file to the validation file')
shutil.copy(training_file_name, validation_file_name)

Generating the training file: training.jsonl
Copying the training file to the validation file


'validation.jsonl'

In [None]:
# # OpenAI에 학습 및 검증 파일이 이미 업로드 되어있는지 확인 
# print('Checking for existing uploaded files.')
# results = []
# files = openai.File.list().data
# print(f'Found {len(files)} total uploaded files in the subscription.')
# for item in files:
#     if item["filename"] in [training_file_name, validation_file_name]:
#         results.append(item["id"])
# print(f'Found {len(results)} already uploaded files that match our names.')

In [None]:
# print(f'Deleting already uploaded files...')
# for id in results:
#     openai.File.delete(sid = id)

In [6]:
# OpenAI에 파일 업로드 및 업로드 상태 체크 
import time

def check_status(training_id, validation_id):
    train_status = openai.File.retrieve(training_id)["status"]
    valid_status = openai.File.retrieve(validation_id)["status"]
    print(f'Status (training_file | validation_file): {train_status} | {valid_status}')
    return (train_status, valid_status)

#importing our two files
training_id = cli.FineTune._get_or_upload(training_file_name, True)
validation_id = cli.FineTune._get_or_upload(validation_file_name, True)

#checking the status of the imports
(train_status, valid_status) = check_status(training_id, validation_id)

while train_status not in ["succeeded", "failed"] or valid_status not in ["succeeded", "failed"]:
    time.sleep(1)
    (train_status, valid_status) = check_status(training_id, validation_id)

Upload progress: 100%|██████████| 204/204 [00:00<00:00, 190kit/s]


Uploaded file from training.jsonl: file-acf1d891d0d14344b236b1a4f2218ea6


Upload progress: 100%|██████████| 204/204 [00:00<?, ?it/s] 


Uploaded file from validation.jsonl: file-23d100935c184a3ab06ea2f63ecbd9cb
Status (training_file | validation_file): notRunning | notRunning
Status (training_file | validation_file): running | running
Status (training_file | validation_file): running | running
Status (training_file | validation_file): running | running
Status (training_file | validation_file): succeeded | running
Status (training_file | validation_file): succeeded | succeeded


In [None]:
# 
print(f'Downloading training file: {training_id}')
result = openai.File.download(training_id)
print(result.decode('utf-8'))

In [None]:
create_args = {
    "training_file": training_id,
    "validation_file": validation_id,
    "model": "babbage",
    "compute_classification_metrics": True,
    "classification_n_classes": 3,
    "n_epochs": 20,
    "batch_size": 3,
    "learning_rate_multiplier": 0.3
}
resp = openai.FineTune.create(**create_args)
job_id = resp["id"]
status = resp["status"]

print(f'Fine-tunning model with jobID: {job_id}.')

In [None]:
import signal
import datetime

def signal_handler(sig, frame):
    status = openai.FineTune.retrieve(job_id).status
    print(f"Stream interrupted. Job is still {status}.")
    return

print(f'Streaming events for the fine-tuning job: {job_id}')
signal.signal(signal.SIGINT, signal_handler)

events = openai.FineTune.stream_events(job_id)
try:
    for event in events:
        print(f'{datetime.datetime.fromtimestamp(event["created_at"])} {event["message"]}')

except Exception:
    print("Stream interrupted (client disconnected).")

In [None]:
status = openai.FineTune.retrieve(id=job_id)["status"]
if status not in ["succeeded", "failed"]:
    print(f'Job not in terminal status: {status}. Waiting.')
    while status not in ["succeeded", "failed"]:
        time.sleep(2)
        status = openai.FineTune.retrieve(id=job_id)["status"]
        print(f'Status: {status}')
else:
    print(f'Finetune job {job_id} finished with status: {status}')

print('Checking other finetune jobs in the subscription.')
result = openai.FineTune.list()
print(f'Found {len(result.data)} finetune jobs.')

In [None]:
#Fist let's get the model of the previous job:
result = openai.FineTune.retrieve(id=job_id)
if result["status"] == 'succeeded':
    model = result["fine_tuned_model"]

# Now let's create the deployment
print(f'Creating a new deployment with model: {model}')
result = openai.Deployment.create(model=model, scale_settings={"scale_type":"standard"})
deployment_id = result["id"]

In [None]:
print(f'Checking for deployment status.')
resp = openai.Deployment.retrieve(id=deployment_id)
status = resp["status"]
print(f'Deployment {deployment_id} is with status: {status}')

In [None]:
print('While deployment running, selecting a completed one.')
deployment_id = None
result = openai.Deployment.list()
for deployment in result.data:
    if deployment["status"] == "succeeded":
        deployment_id = deployment["id"]
        break

if not deployment_id:
    print('No deployment with status: succeeded found.')
else:
    print(f'Found a successful deployment with id: {deployment_id}.')

In [None]:
print('Sending a test completion job')
start_phrase = 'When I go home, I want a'
response = openai.Completion.create(deployment_id=deployment_id, prompt=start_phrase, temperature=0, stop=".")
text = response['choices'][0]['text'].replace('\n', '').replace(' .', '.').strip()
print(f'"{start_phrase} {text}."')