In [1]:
import json

training_data = [{"prompt": "빨강이", "completion": "Data Scientist"},
                 {"prompt": "주황이", "completion": "Developer"},
                 {"prompt": "노랑이", "completion": "Developer"},
                 {"prompt": "초록이", "completion": "Developer"},
                 {"prompt": "파랑이", "completion": "Developer"},
                 {"prompt": "검둥이", "completion": "Data Scientist"},
                 {"prompt": "보랑이", "completion": "Developer"},
                 {"prompt": "남둥이", "completion": "Developer"},
                 {"prompt": "하늘이", "completion": "Data Scientist"},
                 {"prompt": "개나리", "completion": "Data Scientist"}] 

In [2]:
# training_data의 각 항목을 JSONL 형식으로 train_data.jsonl 파일에 기록

file_name = 'train_data.jsonl'

with open(file_name, "w", encoding="utf-8") as output_file:
    for entry in training_data:
        json.dump(entry, output_file)
        output_file.write("\n")

In [3]:
# OpenAI의 API를 사용하여 특정 데이터 파일을 업로드하고, 이를 사용해 모델을 파인튜닝(fine-tuning)하려는 작업을 수행하는 코드
from openai import OpenAI

# OpenAI 클라이언트 생성
client = OpenAI(api_key="API KEY")

# 파일 업로드 및 파인튜닝 준비
upload_response = client.files.create(
    file=open(file_name, 'rb'),
    purpose='fine-tune'
)

# 업로드된 파일 ID
# https://platform.openai.com/storage/files/file-zKUjo5EIarZZMfgHyBZwbidn 에서도 확인 가능
file_id = upload_response.id

In [4]:
file_id

'file-5kEL7RKoV5HwHhmMLcRYdi7R'

In [6]:
# Fine-Tuning 모델 생성
fine_tune_model = client.fine_tuning.jobs.create(
    training_file=file_id,
    model="davinci-002"
)

fine_tune_model

FineTuningJob(id='ftjob-1x6aeNlDYprbCUHjEzZKpQvV', created_at=1724841237, error=Error(code=None, message=None, param=None), fine_tuned_model=None, finished_at=None, hyperparameters=Hyperparameters(n_epochs='auto', batch_size='auto', learning_rate_multiplier='auto'), model='davinci-002', object='fine_tuning.job', organization_id='org-RB4JrJn9lUm6P7io84Be0PH4', result_files=[], seed=444241918, status='validating_files', trained_tokens=None, training_file='file-5kEL7RKoV5HwHhmMLcRYdi7R', validation_file=None, estimated_finish=None, integrations=[], user_provided_suffix=None)

In [8]:
# Fine-Tuning job 리스트 10개 나열
client.fine_tuning.jobs.list(limit=10)

# Fine-Tuning 상태 확인
client.fine_tuning.jobs.retrieve("ftjob-1x6aeNlDYprbCUHjEzZKpQvV") 

FineTuningJob(id='ftjob-1x6aeNlDYprbCUHjEzZKpQvV', created_at=1724841237, error=Error(code=None, message=None, param=None), fine_tuned_model=None, finished_at=None, hyperparameters=Hyperparameters(n_epochs=10, batch_size=1, learning_rate_multiplier=16), model='davinci-002', object='fine_tuning.job', organization_id='org-RB4JrJn9lUm6P7io84Be0PH4', result_files=[], seed=444241918, status='running', trained_tokens=None, training_file='file-5kEL7RKoV5HwHhmMLcRYdi7R', validation_file=None, estimated_finish=None, integrations=[], user_provided_suffix=None)

In [11]:
# 위 status=succeeded 상태이면 추가학습이 완성되어 모델이 생성된 상태
# Fine-Tuning job에서 10개의 이벤트 나열
fine_tune_events = client.fine_tuning.jobs.list_events(fine_tuning_job_id=fine_tune_model.id)
fine_tune_events

SyncCursorPage[FineTuningJobEvent](data=[FineTuningJobEvent(id='ftevent-tRmfqSrZpD6GSQzJP5oFFVZn', created_at=1724841554, level='info', message='The job has successfully completed', object='fine_tuning.job.event', data={}, type='message'), FineTuningJobEvent(id='ftevent-5LJaCa0eq5gMylSVyPRdnvuw', created_at=1724841554, level='info', message='The job has successfully completed', object='fine_tuning.job.event', data={}, type='message'), FineTuningJobEvent(id='ftevent-uBcge7XotAJWPJL2izwUhfsX', created_at=1724841544, level='info', message='New fine-tuned model created', object='fine_tuning.job.event', data={}, type='message'), FineTuningJobEvent(id='ftevent-93yFYGNGFkdhG4QbsG3z3Izk', created_at=1724841543, level='info', message='Checkpoint created at step 90', object='fine_tuning.job.event', data={}, type='message'), FineTuningJobEvent(id='ftevent-oK4MmRV659WJjx17kP4mdueo', created_at=1724841543, level='info', message='Checkpoint created at step 80', object='fine_tuning.job.event', data={

In [12]:
# Fine-Tuning한 모델 사용
# DashBoard에서 Job ID 가져옴
completion = client.completions.create(
    model="ft:davinci-002:personal::A1AMpVE0",
    prompt="검둥이는?")

print(completion.choices[0].text)

# 학습데이터가 많이 없어서 Developer라는 답변이 있을 수 있다.

Developer ScientistData ScientistData ScientistData ScientistData ScientistData ScientistData ScientistData Scientist


In [13]:
completion = client.completions.create(
    model="ft:davinci-002:personal::A1AMpVE0",
    prompt="주황이는?")

print(completion.choices[0].text)

DeveloperDeveloperDeveloperDeveloperDeveloperDeveloperDeveloperDeveloperDeveloperDeveloperDeveloperDeveloperDeveloperDeveloperDeveloperDeveloper
