# Azure OpenAI Fine-Tuning

### Step1. Set up Azure OpenAI

In [8]:
import openai
from openai import cli
import os
import shutil
import json
import time
from dotenv import load_dotenv

load_dotenv()

openai.api_type = 'azure'
openai.api_version = '2023-03-15-preview' 
openai.api_base = os.getenv("SOUTH_CENTRAL_OPENAI_API_BASE")
openai.api_key = os.getenv("SOUTH_CENTRAL_OPENAI_API_KEY")

### Step2. 학습 데이터 생성 및 업로드

In [2]:
# 학습, 검증 데이터 파일 생성 
training_file_name = 'training.jsonl'
validation_file_name = 'validation.jsonl'

sample_data = [{"prompt": "When I go to the store, I want an", "completion": "apple."},
    {"prompt": "When I go to work, I want a", "completion": "coffee."},
    {"prompt": "When I go home, I want a", "completion": "soda."}]

print(f'Generating the training file: {training_file_name}')
with open(training_file_name, 'w') as training_file:
    for entry in sample_data:
        json.dump(entry, training_file)
        training_file.write('\n')

print(f'Copying the training file to the validation file')
shutil.copy(training_file_name, validation_file_name)

Generating the training file: training.jsonl
Copying the training file to the validation file


'validation.jsonl'

In [3]:
def check_status(training_id, validation_id):
    train_status = openai.File.retrieve(training_id)["status"]
    valid_status = openai.File.retrieve(validation_id)["status"]
    print(f'Status (training_file | validation_file): {train_status} | {valid_status}')
    return (train_status, valid_status)

# Azure OpenAI에 파일 업로드 
training_id = cli.FineTune._get_or_upload(training_file_name, True)
validation_id = cli.FineTune._get_or_upload(validation_file_name, True)

# 파일 업로드 상태 확인 
(train_status, valid_status) = check_status(training_id, validation_id)

while train_status not in ["succeeded", "failed"] or valid_status not in ["succeeded", "failed"]:
    time.sleep(1)
    (train_status, valid_status) = check_status(training_id, validation_id)

Upload progress: 100%|██████████| 204/204 [00:00<00:00, 213kit/s]


Uploaded file from training.jsonl: file-d86e28e64e83435ba5c08afd7f0e4276


Upload progress: 100%|██████████| 204/204 [00:00<00:00, 213kit/s]


Uploaded file from validation.jsonl: file-5f9fb5d9dfe74428995048e11b9b20da
Status (training_file | validation_file): running | notRunning
Status (training_file | validation_file): running | notRunning
Status (training_file | validation_file): succeeded | running
Status (training_file | validation_file): succeeded | running
Status (training_file | validation_file): succeeded | succeeded


### Step3. Fine-tune 

In [None]:
# 모델 학습
create_args = {
    "training_file": training_id,
    "validation_file": validation_id,
    "model": "babbage", # 베이스 모델 
    "compute_classification_metrics": True,
    "classification_n_classes": 3,
    "n_epochs": 20,
    "batch_size": 3,
    "learning_rate_multiplier": 0.3
}
resp = openai.FineTune.create(**create_args)
job_id = resp["id"]
status = resp["status"]

print(f'Fine-tunning model with jobID: {job_id}.')

In [None]:
# 모델 학습 상태 확인
status = openai.FineTune.retrieve(id=job_id)["status"]
if status not in ["succeeded", "failed"]:
    print(f'Job not in terminal status: {status}. Waiting.')
    while status not in ["succeeded", "failed"]:
        time.sleep(2)
        status = openai.FineTune.retrieve(id=job_id)["status"]
        print(f'Status: {status}')
else:
    print(f'Finetune job {job_id} finished with status: {status}')

print('Checking other finetune jobs in the subscription.')
result = openai.FineTune.list()
print(f'Found {len(result.data)} finetune jobs.')

### Step4. 모델 배포

In [7]:
result = openai.FineTune.retrieve(id=job_id)
if result["status"] == 'succeeded':
    model = result["fine_tuned_model"]

# 모델 배포 
print(f'Creating a new deployment with model: {model}')
result = openai.Deployment.create(model=model, scale_settings={"scale_type":"standard"})
deployment_id = result["id"]

Creating a new deployment with model: babbage.ft-14c6f970912842b6a4d028f88f347c64


In [13]:
# 배포 상태 확인 
print(f'Checking for deployment status.')
resp = openai.Deployment.retrieve(id=deployment_id)
status = resp["status"]
print(f'Deployment {deployment_id} is with status: {status}')

Checking for deployment status.
Deployment test is with status: succeeded


In [12]:
print('Sending a test completion job')
start_phrase = 'When I go to the work, I want a'
response = openai.Completion.create(deployment_id=deployment_id, prompt=start_phrase, temperature=0, stop=".")
text = response['choices'][0]['text'].replace('\n', '').replace(' .', '.').strip()
print(f'"{start_phrase} {text}."')

Sending a test completion job
"When I go to the work, I want a coffee."
