# 2.1 OpenAI API

## 2.1.1 テキスト生成の基礎

In [None]:
# テキスト生成の基本的な流れ

from openai import OpenAI
from dotenv import load_dotenv

load_dotenv("../.env")
client = OpenAI()

response = client.chat.completions.create(
    temperature=0.0,
    model="gpt-4o",
    messages=[{"role": "user", "content": "こんにちは"}],
)

print(response.choices[0].message.content)

In [None]:
# シンプルな対話 AI の作成

history = []
n = 10  # 会話の上限
model = "gpt-4o"
for _ in range(n):
    user_input = input("ユーザ入力: ")
    if user_input == "exit":
        break
    print(f"ユーザ: {user_input}")
    history.append({"role": "user", "content": user_input})
    response = client.chat.completions.create(model=model, messages=history)
    content = response.choices[0].message.content
    print(f"AI: {content}")
    history.append({"role": "assistant", "content": content})

## 2.1.2 テキスト生成の応用

### Stream Generation

In [None]:
# 生成できた部分から順に表示する

history = []
n = 10  # 会話の上限
model = "gpt-4o"
for _ in range(n):
    user_input = input("ユーザ入力: ")
    if user_input == "exit":
        break
    print(f"ユーザ: {user_input}")
    history.append({"role": "user", "content": user_input})
    # stream=True でストリーミングを有効化
    stream = client.chat.completions.create(model=model, messages=history, stream=True)
    print("AI: ", end="")
    # 応答を集める文字列
    ai_content = ""
    # ストリーミングの各チャンクを処理
    for chunk in stream:
        # message ではなく ChoiceDelta
        content = chunk.choices[0].delta.content
        # ChoiceDelta の finish_reason が stop なら生成完了
        if chunk.choices[0].finish_reason == "stop":
            break
        print(content, end="")
        ai_content += content
    print()
    history.append({"role": "assistant", "content": ai_content})

### Function Calling

In [None]:
# 最大公約数を求めるツールの利用

gcd_function = {
    "name": "gcd",
    "description": "最大公約数を求める",
    "parameters": {
        "type": "object",
        "properties": {
            "num1": {"type": "number", "description": "整数1"},
            "num2": {"type": "number", "description": "整数2"},
        },
        "required": ["num1", "num2"],
    },
}
tools = [{"type": "function", "function": gcd_function}]

messages = [
    {"role": "user", "content": "50141 と 53599 の最大公約数を求めてください。"}
]

response = client.chat.completions.create(
    model="gpt-4o", messages=messages, tools=tools
)

In [None]:
# 関数情報を抽出

import json

function_info = response.choices[0].message.tool_calls[0].function
name = function_info.name
args = json.loads(function_info.arguments)

In [None]:
# 最大公約数の計算

import math

print(math.gcd(args["num1"], args["num2"]))

In [None]:
# Pydantic を用いた関数の定義

from pydantic import BaseModel, Field


class GCD(BaseModel):
    num1: int = Field(description="整数1")
    num2: int = Field(description="整数2")


gcd_function = {
    "name": "gcd",
    "description": "最大公約数を求める",
    "parameters": GCD.model_json_schema(),
}

In [None]:
tools = [{"type": "function", "function": gcd_function}]

messages = [
    {"role": "user", "content": "50141 と 53599 の最大公約数を求めてください。"}
]

response = client.chat.completions.create(
    model="gpt-4o", messages=messages, tools=tools
)

In [None]:
# Pydantic を用いた引数の取得

parsed_result = GCD.model_validate_json(
    response.choices[0].message.tool_calls[0].function.arguments
)
print(parsed_result)

In [None]:
# ツール利用全体の流れ


class LCM(BaseModel):
    num1: int = Field(description="整数1")
    num2: int = Field(description="整数2")


lcm_function = {
    "name": "lcm",
    "description": "最小公倍数を求める",
    "parameters": LCM.model_json_schema(),
}

tools = [
    {"type": "function", "function": gcd_function},
    {"type": "function", "function": lcm_function},
]

messages = [
    {
        "role": "user",
        "content": "50141 と 53599 の最大公約数と最小公倍数を求めてください。",
    }
]

response = client.chat.completions.create(
    model="gpt-4o", messages=messages, tools=tools
)
choice = response.choices[0]
if choice.finish_reason == "tool_calls":
    for tool in choice.message.tool_calls:
        if tool.function.name == "gcd":
            gcd_args = GCD.model_validate_json(tool.function.arguments)
            print(f"最大公約数: {math.gcd(gcd_args.num1, gcd_args.num2)}")
        elif tool.function.name == "lcm":
            lcm_args = LCM.model_validate_json(tool.function.arguments)
            print(f"最小公倍数: {math.lcm(lcm_args.num1, lcm_args.num2)}")
elif choice.finish_reason == "stop":
    print("AI: ", choice.message.content)

### response_format

In [None]:
# response_format の利用例


class Translations(BaseModel):
    english: str = Field(description="英語の文章")
    french: str = Field(description="フランス語の文章")
    chinese: str = Field(description="中国語の文章")


prompt = f"""\
以下に示す文章を英語・フランス語・中国語に翻訳してください。
ただし、アウトプットは後述するフォーマットの JSON 形式で出力してください。

# 文章
吾輩は猫である。名前はまだない。

# 出力フォーマット
以下に JSON Schema 形式のフォーマットを示します。このフォーマットに従うオブジェクトの形で出力してください。
{Translations.model_json_schema()}
"""

response = client.chat.completions.create(
    temperature=0.0,
    model="gpt-4o",
    messages=[{"role": "user", "content": prompt}],
    response_format={"type": "json_object"},
)

translations = Translations.model_validate_json(response.choices[0].message.content)
print("英語:", translations.english)
print("フランス語:", translations.french)
print("中国語:", translations.chinese)

# 2.1.3 画像を入力する

In [None]:
import base64
from pathlib import Path
from typing import Any

from dotenv import load_dotenv
from openai import OpenAI

load_dotenv()
client = OpenAI()


def image2content(image_path: Path) -> dict[str, Any]:
    # base64 エンコード
    with image_path.open("rb") as f:
        image_base64 = base64.b64encode(f.read()).decode("utf-8")

    # content の作成
    content = {
        "type": "image_url",
        "image_url": {"url": f"data:image/png;base64,{image_base64}", "detail": "low"},
    }
    return content

In [None]:
prompt = "これは何の画像ですか?"
image_path = Path("./sample_image1.png")
contents = [{"type": "text", "text": prompt}, image2content(image_path)]

response = client.chat.completions.create(
    model="gpt-4o",
    temperature=0.0,
    messages=[{"role": "user", "content": contents}],
)

print(response.choices[0].message.content)

In [None]:
image_path2 = Path("./sample_image2.png")

prompt = "2枚の画像の違いを教えてください。"
contents = [
    {"type": "text", "text": prompt},
    image2content(image_path),
    image2content(image_path2),
]
response = client.chat.completions.create(
    model="gpt-4o",
    temperature=0.0,
    messages=[{"role": "user", "content": contents}],
)

print(response.choices[0].message.content)