下記を参考にした
[How_to_finetune_chat_models.ipynb](https://github.com/openai/openai-cookbook/blob/main/examples/How_to_finetune_chat_models.ipynb)

In [3]:
import json
import openai
import os
import pandas as pd
from pprint import pprint

chatgpt_key = os.environ['CHATGPT_KEY']
client = openai.OpenAI(api_key=os.environ.get("OPENAI_API_KEY", chatgpt_key))

In [5]:
# Read in the dataset we'll use for this task.
recipe_df = pd.read_csv("./zundamon.csv")

recipe_df.head()

Unnamed: 0,Question,Answer
0,ずんだもんとは,SSS合同会社によるずんだ餅をモチーフにした東北地方のマスコットキャラクター
1,ずんだもんの性別は,女
2,ずんだもんの種族は,ずんだの妖精
3,ずんだもんの特技は,ずんだアローへの変身
4,ずんだもんは誰にデザインされましたか,江戸村ににこ


In [6]:
training_data = []

system_message = "You are a helpful assistant. You reply based on each of the facts provided in Japanese."

def prepare_example_conversation(row):
    messages = []
    messages.append({"role": "system", "content": system_message})

    messages.append({"role": "user", "content": row["Question"]})

    messages.append({"role": "assistant", "content": row["Answer"]})

    return {"messages": messages}

pprint(prepare_example_conversation(recipe_df.iloc[0]))

{'messages': [{'content': 'You are a helpful assistant. You reply based on '
                          'each of the facts provided in Japanese.',
               'role': 'system'},
              {'content': 'ずんだもんとは', 'role': 'user'},
              {'content': 'SSS合同会社によるずんだ餅をモチーフにした東北地方のマスコットキャラクター',
               'role': 'assistant'}]}


In [7]:
# use the first 100 rows of the dataset for training
training_df = recipe_df.loc[0:53]

# apply the prepare_example_conversation function to each row of the training_df
training_data = training_df.apply(prepare_example_conversation, axis=1).tolist()

for example in training_data[:5]:
    print(example)

{'messages': [{'role': 'system', 'content': 'You are a helpful assistant. You reply based on each of the facts provided in Japanese.'}, {'role': 'user', 'content': 'ずんだもんとは'}, {'role': 'assistant', 'content': 'SSS合同会社によるずんだ餅をモチーフにした東北地方のマスコットキャラクター'}]}
{'messages': [{'role': 'system', 'content': 'You are a helpful assistant. You reply based on each of the facts provided in Japanese.'}, {'role': 'user', 'content': 'ずんだもんの性別は'}, {'role': 'assistant', 'content': '女'}]}
{'messages': [{'role': 'system', 'content': 'You are a helpful assistant. You reply based on each of the facts provided in Japanese.'}, {'role': 'user', 'content': 'ずんだもんの種族は'}, {'role': 'assistant', 'content': 'ずんだの妖精'}]}
{'messages': [{'role': 'system', 'content': 'You are a helpful assistant. You reply based on each of the facts provided in Japanese.'}, {'role': 'user', 'content': 'ずんだもんの特技は'}, {'role': 'assistant', 'content': 'ずんだアローへの変身'}]}
{'messages': [{'role': 'system', 'content': 'You are a helpful assistant. You r

In [97]:
validation_df = recipe_df.loc[54:89]
validation_data = validation_df.apply(prepare_example_conversation, axis=1).tolist()

In [98]:
def write_jsonl(data_list: list, filename: str) -> None:
    with open(filename, "w") as out:
        for ddict in data_list:
            jout = json.dumps(ddict) + "\n"
            out.write(jout)

In [99]:
training_file_name = "tmp_zundamon_finetune_training.jsonl"
write_jsonl(training_data, training_file_name)

validation_file_name = "tmp_zundamon_finetune_validation.jsonl"
write_jsonl(validation_data, validation_file_name)

In [100]:
with open(training_file_name, "rb") as training_fd:
    training_response = client.files.create(
        file=training_fd, purpose="fine-tune"
    )

training_file_id = training_response.id

with open(validation_file_name, "rb") as validation_fd:
    validation_response = client.files.create(
        file=validation_fd, purpose="fine-tune"
    )
validation_file_id = validation_response.id

print("Training file ID:", training_file_id)
print("Validation file ID:", validation_file_id)

Training file ID: file-24Msg4YEaykEXc6a5GcVaxIa
Validation file ID: file-ihrdmfh88s2M57TMWn5WiaEl


In [101]:
response = client.fine_tuning.jobs.create(
    training_file=training_file_id,
    validation_file=validation_file_id,
    model="gpt-3.5-turbo",
    suffix="zundamon-ner",
)

job_id = response.id

print("Job ID:", response.id)
print("Status:", response.status)

Job ID: ftjob-9aT8eju4o75p3jwJ2Ao4ow6i
Status: validating_files


In [102]:
response = client.fine_tuning.jobs.retrieve(job_id)
fine_tuned_model_id = response.fine_tuned_model

if fine_tuned_model_id is None: 
    raise RuntimeError("Fine-tuned model ID not found. Your job has likely not been completed yet.")

print("Fine-tuned model ID:", fine_tuned_model_id)

Fine-tuned model ID: ft:gpt-3.5-turbo-0125:personal:zundamon-ner:9SNdZ8Dq


In [21]:
test_df = recipe_df.loc[:]
test_row = test_df.iloc[5]
test_messages = []
test_messages.append({"role": "system", "content": system_message})
user_message = test_row["Question"]
#test_messages.append({"role": "user", "content": user_message})

ques = "ずんだもんは誰がデザイン"
test_messages.append({"role": "user", "content": ques})
pprint(test_messages)

[{'content': 'You are a helpful assistant. You reply based on each of the '
             'facts provided in Japanese.',
  'role': 'system'},
 {'content': 'ずんだもんは誰がデザイン', 'role': 'user'}]


In [22]:
fine_tuned_model_id = "ft:gpt-3.5-turbo-0125:personal:zundamon-ner:9SNdZ8Dq";
response = client.chat.completions.create(
    model=fine_tuned_model_id, messages=test_messages, top_p=0, max_tokens=500
)
print(response.choices)

[Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content='江戸村ににこる', role='assistant', function_call=None, tool_calls=None))]
