In [29]:
import os, json
import dotenv
from io import BytesIO

from openai import OpenAI
from PIL import Image
import pandas as pd
import numpy as np
import requests
from dotenv import load_dotenv
from tqdm import tqdm

load_dotenv()
client = OpenAI()


In [2]:
# Load data
data = []
with open("../raw_data/meta_amazon_fashion.jsonl", "r") as f:
    for line in f:
        item = json.loads(line)
        data.append(item)

df_amazon_fashion = pd.DataFrame(data)


In [4]:
# Handle missing values or values with only space
df_amazon_fashion = df_amazon_fashion.applymap(
    lambda x: np.nan               # turn into NaN
    if (
        (isinstance(x, list) and len(x) == 0)
        or (isinstance(x, str) and x.strip() == "")
    )
    else x
)


  df_amazon_fashion = df_amazon_fashion.applymap(


# Purpose of this notebook

- This notebook is for function testing of using openAI vision to extract title or other details for amazon product list.
- Some of the product in the dataset have missing value of tittle. Thus generating title would help with the semantic search.
- This method would be evaluate based on the cost of processing each image

# Estimation of cost in processing images of each resolution using OpenAI model

In [54]:
def get_image_patches_count(img_url: str) -> int:

    response = requests.get(img_url)
    image = Image.open(BytesIO(response.content))
    
    width, height = image.size
    width_patches = (width + 32 - 1) // 32
    height_patches = (height + 32 - 1) // 32
    
    return width_patches *height_patches

In [62]:
pixel_results = {}

for idx, row in tqdm(df_amazon_fashion[:500].iterrows(), total=500):
    first_image = row["images"][0]
    
    for key, image in first_image.items():
        if key == "variant":
            continue

        if image:
            if idx not in pixel_results:
                pixel_results[idx] = {}

            pixel_results[idx][key] = get_image_patches_count(image)


100%|██████████| 500/500 [03:49<00:00,  2.18it/s]  


In [67]:
thumb_image_avg_patches = np.mean([t["thumb"] for t in pixel_results.values()])
large_image_avg_patches = np.mean([t["large"] for t in pixel_results.values()])
hi_res_image_avg_patches = np.mean([t["hi_res"] for t in pixel_results.values() if "hi_res" in t])

print(f"Thumb image avg patches: {thumb_image_avg_patches}")
print(f"Large image avg patches: {large_image_avg_patches}")
print(f"Hi_res image avg patches: {hi_res_image_avg_patches}")


Thumb image avg patches: 4.0
Large image avg patches: 158.842
Hi_res image avg patches: 1214.7885835095137


### Observation

- Some of the hi_res image are missing
- The average patches of thumb, large, hi_res image are all under the maximum cap of OpenAI vision model.
- for hi_res image, gpt-4.1-mini_tokens = 1966.68, gpt-4.1-nano_tokens = 2986.44
- Cost of processing one image would be 0.0007 for gpt-4-mini and 0.0002 for gpt-4-nano, if I were to run throughout all the images, it would cost $578 for gpt-4-mini and 165 for gpt-4-nano
- In order to save cost for the rest of project, the best decision to only use it on items with missing title.

In [70]:
# gpt-4.1-min price = 0.0015 per 1k tokens
hi_res_image_avg_tokens = 1966.68
cost = hi_res_image_avg_tokens * 0.4 / 1000000
print(f"Cost for hi_res image using gpt-4.1-mini: {cost}")

# gpt-4.1-nano price = 0.003 per 1k tokens
hi_res_image_avg_tokens = 2986.44
cost = hi_res_image_avg_tokens * 0.1 / 1000000
print(f"Cost for hi_res image using gpt-4.1-nano: {cost}")

Cost for hi_res image using gpt-4.1-mini: 0.000786672
Cost for hi_res image using gpt-4.1-nano: 0.000298644


In [73]:
df_amazon_fashion.loc[df_amazon_fashion["title"].isna()].shape[0]

58

# Feature extraction from product image using OpenAI-vision

In [81]:
def image_feature_extraction(img_url: str, system_prompt: str, prompt: str) -> str:

    response = client.responses.create(
    model="gpt-4.1-mini",
    input=[{
                "role": "system",
                "content": system_prompt
            },
            {
            "role": "user",
            "content": [
               
                {
                    "type": "input_text",
                    "text": prompt
                },
                {
                    "type": "input_image",
                    "image_url": img_url
                }
            ]
        }
    ]
)
    
    return response.output_text

In [84]:
df_amazon_fashion.loc[df_amazon_fashion.title.isna()]["images"].iat[3][0]

{'thumb': 'https://m.media-amazon.com/images/I/41yu6BWYFwL._AC_SR38,50_.jpg',
 'large': 'https://m.media-amazon.com/images/I/41yu6BWYFwL._AC_.jpg',
 'variant': 'MAIN',
 'hi_res': 'https://m.media-amazon.com/images/I/713-I6hOwML._AC_UL1500_.jpg'}

In [98]:
system_prompt = """
                You are a helpful assistant that analyzes fashion product images
                and generates detailed and accurate titles suitable for e-commerce listings.
                """

prompt = """Please generate a detailed product title for this fashion item.
            
            fashion_categories = {
            "Clothing": [
                "Activewear",
                "Dresses",
                "Jeans",
                "Jackets & Coats",
                "Loungewear & Sleepwear",
                "Pants & Trousers",
                "Shirts",
                "Shirts & Blouses",
                "Shorts",
                "Suits & Blazers",
                "Sweaters & Hoodies",
                "Swimwear",
                "Tops & Tees",
                "T-shirts",
                "Underwear & Lingerie",
                "Clothing",
                "Not Clothing"
            ],
            
            "Shoes": [
                "Athletic Shoes",
                "Boots",
                "Dress Shoes",
                "Flats",
                "Heels",
                "Loafers & Slip-ons",
                "Sandals",
                "Sneakers",
                "Shoes",
                "Not Shoes"
            ],

            "Accessories": [
                "Bags & Purses",
                "Belts",
                "Eyewear (Sunglasses, Optical Frames)",
                "Gloves & Mittens",
                "Hats & Caps",
                "Jewelry",
                "Scarves & Wraps",
                "Wallets",
                "Watches",
                "Accessories",
                "Not Accessories"
            ],

            "Sportswear": [
                "Compression Clothing",
                "Gym Wear",
                "Outdoor Gear (Hiking, Running)",
                "Yoga Apparel",
                "Sportswear",
                "Not Sportswear"
            ],

            "Uniforms & Workwear": [
                "Chef Wear",
                "Lab Coats",
                "Scrubs",
                "Security/Industrial Uniforms",
                "Uniforms & Workwear",
                "Not Uniforms & Workwear"
            ]
            }

            ## Output format
            - please return in JSON object format (do not include ```)


            ## Example output
            {   
                "Image_available": "Yes",
                "Title": "Tops & Tees",
                "Description": "This is a description of the product",
                "Age Range (Description)": "teenager",
                "Brand": "Brand name",
                "Color": "Red",
                "Style": "Casual",
                "Pattern": "Solid",
                "Theme": "Spring",
                "Occasion": "Beach" or "Office" or "Party" or "Casual" or "Formal" or "Sport" or "Other",
                "Clothing_category": "Tops & Tees",
                "Shoes_category": "Not Shoes",
                "Accessories_category": "Not Accessories",
                "Sportswear_category": "Not Sportswear",
                "Uniforms & Workwear_category": "Not Uniforms & Workwear",

            }


"""

image_url = "https://m.media-amazon.com/images/I/41yu6BWYFwL._AC_.jpg"

response = image_feature_extraction(image_url, system_prompt, prompt)

print(response)

{
  "Image_available": "Yes",
  "Title": "Toddler Girls' Camouflage Three-Piece Outfit with 'Daddy's Girl' Printed T-Shirt, Pants, and Matching Headband",
  "Description": "This adorable three-piece toddler outfit includes an olive green short-sleeve T-shirt with 'Daddy's Girl' printed in white, camouflage elastic waist pants, and a matching camouflage knotted headband. Perfect for casual wear and playful days.",
  "Age Range (Description)": "toddler",
  "Sex": "Women's",
  "Brand": "Not Specified",
  "Color": "Olive Green, Camouflage",
  "Style": "Casual",
  "Pattern": "Camouflage, Printed",
  "Theme": "Casual Playtime",
  "Clothing_category": "Tops & Tees",
  "Shoes_category": "Not Shoes",
  "Accessories_category": "Accessories",
  "Sportswear_category": "Not Sportswear",
  "Uniforms & Workwear_category": "Not Uniforms & Workwear",
  "Adult_category": "No",
  "Violience": "No",
  "Harmful": "No"
}
