In [11]:
from openai import OpenAI
import json
import pandas as pd
from tqdm import tqdm
from IPython.display import HTML
import random

client = OpenAI()

# ----------------------------------------
# JSONロード関数
# ----------------------------------------

def load_items(data_path):
    with open(data_path, encoding="utf-8") as f:
        data = json.load(f)
    if isinstance(data, list):
        return data
    elif "items" in data:
        return data["items"]
    elif "results" in data:
        return data["results"]
    else:
        raise ValueError("items または results 配列が見つかりません。")

def get_image_url(item):
    """Amazon形式データから最適な画像URLを返す"""
    if not item.get("images"):
        return None
    for im in item["images"]:
        url = im.get("hi_res") or im.get("large") or im.get("thumb")
        if url:
            return url
    return None

# ----------------------------------------
# キャプション生成
# ----------------------------------------

def generate_image_caption(
    image_path: str,
    title: str = "",
    store: str = "",
    system_prompt: str = SYSTEM_PROMPT,
    caption_prompt: str = CAPTION_PROMPT,
    model: str = MODEL,
):
    if not image_path:
        return None
    try:
        user_prompt = caption_prompt.format(title=title or "", store=store or "")
        response = client.chat.completions.create(
            model=model,
            messages=[
                {"role": "system", "content": system_prompt.strip()},
                {
                    "role": "user",
                    "content": [
                        {"type": "text", "text": user_prompt.strip()},
                        {"type": "image_url", "image_url": {"url": image_path}}
                    ]
                }
            ],
            max_tokens=120,
            temperature=0.4,
        )
        return response.choices[0].message.content.strip()
    except Exception as e:
        print(f"⚠️ Error for {image_path}: {e}")
        return None

# ----------------------------------------
# N件サンプルしてキャプション生成＋可視化
# ----------------------------------------
from IPython.display import HTML
from tqdm import tqdm
import pandas as pd
import random

def preview_caption_comparison(
    data_path: str,
    n: int = 5,
    random_sample: bool = True,
    system_prompt: str = None,
    prompt_image_only: str = None,
    prompt_with_text: str = None,
    model: str = "gpt-4o-mini",
    return_df: bool = True
):
    """
    Notebook上で以下2パターンのキャプションを比較表示し、DataFrameも返す:
      ① 画像のみで生成
      ② 画像＋テキスト情報を含めて生成

    引数:
        data_path: JSONファイルパス
        n: サンプル数
        random_sample: Trueならランダム抽出
        system_prompt: システムプロンプト
        prompt_image_only: 画像単独キャプション用プロンプト
        prompt_with_text: テキスト統合キャプション用プロンプト
        model: OpenAIモデル
        return_df: TrueならDataFrameをreturn
    """
    if system_prompt is None:
        raise ValueError("system_prompt を指定してください。")
    if prompt_image_only is None:
        raise ValueError("prompt_image_only を指定してください。")
    if prompt_with_text is None:
        raise ValueError("prompt_with_text を指定してください。")

    items = load_items(data_path)
    if random_sample:
        items = random.sample(items, min(n, len(items)))
    else:
        items = items[:n]

    rows = []
    for item in tqdm(items, desc=f"Generating and comparing {n} captions"):
        url = get_image_url(item)
        title = item.get("title")
        store = item.get("store")
        features = item.get("features", [])

        # ① 画像のみ
        cap_img_only = generate_image_caption(
            image_path=url,
            title=title,
            store=store,
            system_prompt=system_prompt,
            caption_prompt=prompt_image_only,
            model=model
        )

        # ② 画像＋テキスト統合
        features_text = "; ".join(features) if features else "None"
        prompt_text = prompt_with_text.format(
            title=title or "",
            store=store or "",
            features=features_text
        )
        cap_with_text = generate_image_caption(
            image_path=url,
            title=title,
            store=store,
            system_prompt=system_prompt,
            caption_prompt=prompt_text,
            model=model
        )

        rows.append({
            "title": title,
            "store": store,
            "image_url": url,
            "features": features_text,
            "caption_image_only": cap_img_only,
            "caption_with_text": cap_with_text
        })

    # DataFrame生成
    df = pd.DataFrame(rows)

    # Notebook用HTML表示
    html = """
    <style>
      body { background:#0f1115; color:#e2e8f0; font-family:sans-serif; }
      .grid { display:grid; grid-template-columns:repeat(auto-fill,minmax(360px,1fr)); gap:12px; }
      .card { background:#171923; border:1px solid #2a2f3a; border-radius:12px; padding:10px; }
      .thumb { width:100%; border-radius:8px; object-fit:cover; aspect-ratio:1/1; margin-bottom:6px; }
      .title { font-weight:700; margin-bottom:6px; font-size:15px; }
      .store { font-size:12px; color:#a0aec0; margin-bottom:6px; }
      .caption-block { background:#1a202c; padding:8px; border-radius:8px; margin-bottom:6px; }
      .caption-block h4 { margin:0 0 4px 0; font-size:13px; color:#93c5fd; }
      .caption-text { font-size:13px; color:#cbd5e1; line-height:1.4; }
    </style>
    <div class="grid">
    """
    for r in rows:
        html += f"""
        <div class="card">
          <img src="{r['image_url']}" class="thumb">
          <div class="title">{r['title']}</div>
          <div class="store">{r['store']}</div>
          <div class="caption-block">
            <h4>① 画像のみ</h4>
            <div class="caption-text">{r['caption_image_only']}</div>
          </div>
          <div class="caption-block">
            <h4>② 画像＋テキスト統合</h4>
            <div class="caption-text">{r['caption_with_text']}</div>
          </div>
        </div>
        """
    html += "</div>"

    display(HTML(html))
    if return_df:
        return df


In [19]:
# ---- 後で定義してOK（英語固定・ブランド厳格・1–2文）----
SYSTEM_PROMPT = """
You are a factual assistant for e-commerce product images.
Always write in English.
Produce concise, objective captions (3–5 sentences).
Describe only what is visible in the image or explicitly provided in the user prompt.
Do not speculate about attributes that are not clearly visible or provided.
For Brand, include it only if it is verifiable from the image (logo/label) or explicitly given in the prompt; otherwise, omit any brand claim.
Avoid marketing language, opinions, and unverifiable claims.
Prefer concrete attributes: category, color, material, silhouette/fit, notable details.
Mention season/occasion/style only if clearly supported by visible cues (coverage, fabric weight, sparkle, etc.).
"""

PROMPT_IMAGE_ONLY = """
Write one concise factual caption (3–5 sentences) for this product image.
Use only what is visually observable: category, color, material, silhouette/fit, notable details.
Do NOT infer season/occasion/style unless clearly indicated by visible cues.
Mention Brand only if it is visible in the image (logo/label); otherwise, do not mention a brand.
Keep a neutral, non-marketing tone.
"""

PROMPT_WITH_TEXT = """
Write one concise factual caption (3–5 sentences) for this product image using both the visual content and the context below.
Do NOT contradict what is visible in the image. If the context mentions attributes that are not visible, include them only if they do not contradict the image.
Prefer concrete attributes: category, color, material, silhouette/fit, notable details.
Mention season/occasion/style only if supported by visible cues or explicitly provided.
For Brand, include it only if it is visible in the image or explicitly present in the context (Title/Store); otherwise, omit any brand claim.
Keep a neutral, non-marketing tone.

Context:
Title: {title}
Store: {store}
Features: {features}
"""

SAMPLE_DATA = "../data/sample/sample_1000.json"
N = 2

In [20]:
# キャプション生成＋比較
captions_df = preview_caption_comparison(
    data_path=SAMPLE_DATA,
    n=N,
    system_prompt=SYSTEM_PROMPT,
    prompt_image_only=PROMPT_IMAGE_ONLY,
    prompt_with_text=PROMPT_WITH_TEXT,
)

Generating and comparing 2 captions:   0%|          | 0/2 [00:00<?, ?it/s]

Generating and comparing 2 captions: 100%|██████████| 2/2 [00:14<00:00,  7.19s/it]


In [21]:
# 保存（例：csv）
OUTPUT_PATH = "../data/sample/captions_sample.csv"
captions_df.to_csv(OUTPUT_PATH, index=False)

# 別Notebookで再利用
import pandas as pd
df = pd.read_csv(OUTPUT_PATH)

df.head()

Unnamed: 0,title,store,image_url,features,caption_image_only,caption_with_text
0,Little Big Sister Toddler Baby Girl Matching C...,DiDaDo,https://m.media-amazon.com/images/I/71b+Pcqy++...,,This two-piece outfit features a white long-sl...,This outfit set features a long-sleeve white t...
1,Sweat Vest for Women - (XS-10XL) - Weightless ...,Sweat Spark,https://m.media-amazon.com/images/I/714bns3Kr5...,Hand Wash Only,This product image features a women's sweat ve...,This sweat vest for women features a sleek bla...
