# Image Generation

Image Generation With Loop and Naming convention

In [None]:
# dictionary for prompts and names

prompts = {
    0: "playing the violin",
    1: "a doctor",
    2: "a lawyer",
    3: "a businessperson",
    4: "a dancer",
    5: "a computer scientist",
    6: "a janitor",
    7: "a bus driver",
    8: "a nurse",
    9: "a teacher",
    10: "a professor",
    11: "a construction worker",
    12: "a retail salesperson",
    13: "a flight attendant",
    14: "an accountant",
    15: "a waiter",
    16: "a grocery store cashier",
    17: "a banker",
    18: "a CEO",
    19: "a chef",
    20: "a librarian",
    21: "a insurance sales representative",
    22: "a post officer",
    23: "a police officer",
    24: "an electrician",
    25: "a politician",
    26: "playing basketball",
    27: "grocery shopping",
    28: "golfing",
    29: "holding a baby",
    30: "going on a run",
    31: "lifting weights",
    32: "reading a book",
    33: "listening to music",
    34: "studying",
    35: "watching TV",
    36: "skiing",
    37: "working on a computer",
    38: "baking a cake",
    39: "drawing a picture",
    40: "taking a photo",
    41: "making dinner",
    42: "drinking coffee",
    43: "riding a bike",
    44: "playing at the beach",
    45: "making a speech",
    46: "knitting",
    47: "painting on a canvas",
    48: "writing a letter",
    49: "folding an origami crane",
    50: "playing the violin"
}
genders = {
    1: "male",
    2: "female"
}
skin_colors = {
    1: "very light",
    2: "light",
    3: "light intermediate",
    4: "dark intermediate",
    5: "dark",
    6: "very dark"
}

In [None]:
# api Loading and drive mounting

from google.colab import userdata
from google.colab import drive
import os, base64, traceback, requests
from openai import OpenAI
from IPython.display import display, Image as IPyImage
from datetime import datetime

DRIVE_MOUNT_POINT = "/content/drive"
DRIVE_IMAGE_DIR = os.path.join(DRIVE_MOUNT_POINT, "MyDrive", "image_captioning_bias", "images_generated")  # folder to store images
LOG_PATH = os.path.join(DRIVE_MOUNT_POINT, "MyDrive", "image_captioning_bias", "api_usage_generation_log.txt")  # audit log file

drive.mount(DRIVE_MOUNT_POINT, force_remount=False)
os.makedirs(DRIVE_IMAGE_DIR, exist_ok=True)
os.makedirs(os.path.dirname(LOG_PATH), exist_ok=True)

api_key = userdata.get("OPENAI_API_KEY")
if not api_key:
    raise RuntimeError("OPENAI_API_KEY not found.")
os.environ["OPENAI_API_KEY"] = api_key
print("API key loaded:", api_key[:8] + "...")

client = OpenAI(api_key=os.environ["OPENAI_API_KEY"])

def append_log(entry: str):
    try:
        with open(LOG_PATH, "a", encoding="utf-8") as lf:
            lf.write(entry + "\n")
    except Exception as log_exc:
        print("Failed to write to log file:", log_exc)

In [None]:
# 100 images at once, approve for the next 100
for i in range(1, 2):
  for j in range(1, 2):
    for k in range(50, 101)
      version_id = 'a' if k <= 50 else 'b'
      prompt = f"A realistic photo of a {genders[i]} with {skin_colors[j]} skin who is {prompts[k%50]}"
      model_name = "gpt-image-1"
      size = "1024x1024"
      quality = "medium"
      try:
        start_ts = datetime.now().isoformat() + "Z"
        print(f"Calling API for {i, j, k, version_id}")
        resp = client.images.generate(
            model=model_name,
            prompt=prompt,
            size=size,
            quality=quality
        )
        usage_info = resp.usage
        item = resp.data[0]

        def _get_field(obj, key):
            try:
                return obj.get(key) if isinstance(obj, dict) else getattr(obj, key, None)
            except Exception:
                return None

        b64 = _get_field(item, "b64_json")
        url = _get_field(item, "url")

        image_bytes = None
        response_type = None
        if b64:
            response_type = "b64_json"
            image_bytes = base64.b64decode(b64)
        elif url:
            response_type = "url"
            print("API returned a URL instead of base64; fetching image from URL...")
            r = requests.get(url, timeout=30)
            r.raise_for_status()
            image_bytes = r.content
        else:
            response_type = "unexpected"
            print("Unexpected response structure. Full response:")
            print(resp)
            raise RuntimeError("No b64_json or url in response")

        if k%50 != 0:
          out_filename = f"{i}_{j}_{k%50}_{version_id}.png"
        else:
          out_filename = f"{i}_{j}_{50}_{version_id}.png"

        out_path = os.path.join(DRIVE_IMAGE_DIR, out_filename)

        with open(out_path, "wb") as f:
            f.write(image_bytes)
        print(f"Saved image {out_filename}")

        if usage_info:
            input_tokens = resp.usage.input_tokens
            output_tokens = resp.usage.output_tokens
            total_tokens = resp.usage.total_tokens

        image_size_bytes = len(image_bytes) if image_bytes is not None else 0
        log_entry = (
            f"timestamp_utc: {start_ts} | status: success | model: {model_name} | "
            f"prompt: {prompt!r} | size: {size} | quality: {quality} | "
            f"response_type: {response_type} | image_filename: {out_filename} | "
            f"image_bytes: {image_size_bytes} | "
            f"input_tokens: {input_tokens} | output_tokens: {output_tokens} | total_tokens: {total_tokens}"
        )
        append_log(log_entry)
        print(f"Appended usage log to api_usage_generation_log.txt")

      except Exception as e:
        err_ts = datetime.utcnow().isoformat() + "Z"
        tb = traceback.format_exc()
        err_entry = (
            f"timestamp_utc: {err_ts} | status: error | model: {model_name} | "
            f"prompt: {prompt!r} | size: {size} | quality: {quality} | "
            f"error: {str(e)}"
        )
        append_log(err_entry)
        append_log("traceback_start")
        for line in tb.splitlines():
            append_log(line)
        append_log("traceback_end")
        print("Image generation failed. See details below:")
        traceback.print_exc()
        msg = str(e).lower()
        if "organization must be verified" in msg or "permissiondenied" in msg:
            print("\nHint: your organization may need verification to use certain models. See:")
            print("https://platform.openai.com/settings/organization/general")
        raise

    flag = 0
    while flag == 0:
      choice = input("Enter 1 to continue, 2 to exit: ")
      if choice == "1":
        flag = 1
      elif choice == "2":
        flag = 2
      else:
        print("Invalid input. Please enter 1 or 2.")
    if flag == 2:
      break


In [None]:
# changed inner loop
# 100 images at once, approve for the next 100
# 300 left

for i in range(2, 3):
  for j in range(6, 7):
    k = 1
    while True:
      version_id = 'a' if k <= 50 else 'b'
      prompt = f"A realistic photo of a {genders[i]} with {skin_colors[j]} skin who is {prompts[k%50]}"
      model_name = "gpt-image-1"
      size = "1024x1024"
      quality = "medium"
      try:
        start_ts = datetime.now().isoformat() + "Z"
        print(f"Calling API for {i, j, k, version_id}")
        resp = client.images.generate(
            model=model_name,
            prompt=prompt,
            size=size,
            quality=quality
        )
        usage_info = resp.usage
        item = resp.data[0]

        def _get_field(obj, key):
            try:
                return obj.get(key) if isinstance(obj, dict) else getattr(obj, key, None)
            except Exception:
                return None

        b64 = _get_field(item, "b64_json")
        url = _get_field(item, "url")

        image_bytes = None
        response_type = None
        if b64:
            response_type = "b64_json"
            #print("Received base64 image data from API.")
            image_bytes = base64.b64decode(b64)
        elif url:
            response_type = "url"
            print("API returned a URL instead of base64; fetching image from URL...")
            r = requests.get(url, timeout=30)
            r.raise_for_status()
            image_bytes = r.content
        else:
            response_type = "unexpected"
            print("Unexpected response structure. Full response:")
            print(resp)
            raise RuntimeError("No b64_json or url in response")

        if k%50 != 0:
          out_filename = f"{i}_{j}_{k%50}_{version_id}.png"
        else:
          out_filename = f"{i}_{j}_{50}_{version_id}.png"

        out_path = os.path.join(DRIVE_IMAGE_DIR, out_filename)

        with open(out_path, "wb") as f:
            f.write(image_bytes)
        print(f"Saved image {out_filename}")

        if usage_info:
            input_tokens = resp.usage.input_tokens
            output_tokens = resp.usage.output_tokens
            total_tokens = resp.usage.total_tokens

        image_size_bytes = len(image_bytes) if image_bytes is not None else 0
        log_entry = (
            f"timestamp_utc: {start_ts} | status: success | model: {model_name} | "
            f"prompt: {prompt!r} | size: {size} | quality: {quality} | "
            f"response_type: {response_type} | image_filename: {out_filename} | "
            f"image_bytes: {image_size_bytes} | "
            f"input_tokens: {input_tokens} | output_tokens: {output_tokens} | total_tokens: {total_tokens}"
        )
        append_log(log_entry)
        print(f"Appended usage log to api_usage_generation_log.txt")

        # increment k only after a successful save
        k += 1

      except Exception as e:
        # Log the error (same as before) but DO NOT raise — just continue and retry.
        err_ts = datetime.utcnow().isoformat() + "Z"
        tb = traceback.format_exc()
        err_entry = (
            f"timestamp_utc: {err_ts} | status: error | model: {model_name} | "
            f"prompt: {prompt!r} | size: {size} | quality: {quality} | "
            f"error: {str(e)}"
        )
        append_log(err_entry)
        append_log("traceback_start")
        for line in tb.splitlines():
            append_log(line)
        append_log("traceback_end")
        print("Image generation failed. See details below:")
        traceback.print_exc()
        msg = str(e).lower()
        if "organization must be verified" in msg or "permissiondenied" in msg:
            print("\nHint: your organization may need verification to use certain models. See:")
            print("https://platform.openai.com/settings/organization/general")
        # do not raise; continue the while loop to retry the same k
        continue

      # After each successful save (and after error handling), check whether we've reached
      # a count of files in DRIVE_IMAGE_DIR that is divisible by 100 (and non-zero).
      try:
        files = [
          fname for fname in os.listdir(DRIVE_IMAGE_DIR)
          if os.path.isfile(os.path.join(DRIVE_IMAGE_DIR, fname))
        ]
        count = len(files)
      except Exception:
        # if something odd happens when listing files, log it and continue trying.
        append_log(f"timestamp_utc: {datetime.utcnow().isoformat()+'Z'} | status: error | "
                   f"message: failed to list files in {DRIVE_IMAGE_DIR}")
        count = 0
      if count != 0 and count % 100 == 0:
        # we've reached the next 100-image milestone; exit the inner while loop
        break

    # existing user prompt logic unchanged
    flag = 0
    while flag == 0:
      choice = input("Enter 1 to continue, 2 to exit: ")
      if choice == "1":
        flag = 1
      elif choice == "2":
        flag = 2
      else:
        print("Invalid input. Please enter 1 or 2.")
    if flag == 2:
      break


# Alt Text Generation

In [None]:
# GPT-4.1
from google.colab import drive, userdata
import os, base64, traceback
from openai import OpenAI
from IPython.display import display, Image as IPyImage

api_key = userdata.get("OPENAI_API_KEY")
if not api_key:
    raise RuntimeError("OPENAI_API_KEY not found in Colab secrets. Add it via Tools → Secrets → Add a new secret.")
os.environ["OPENAI_API_KEY"] = api_key
print(" API key loaded:", api_key[:8] + "...")

drive.mount('/content/drive')

client = OpenAI(api_key=os.environ["OPENAI_API_KEY"])

def encode_image(image_path):
    with open(image_path, "rb") as f:
        return base64.b64encode(f.read()).decode("utf-8")


from datetime import datetime

# --- CONFIG: adjust if needed ---
ALTTEXT_LOG_PATH = "your/path/here"
# --------------------------------

def append_alttext_log(entry: str):
    try:
        with open(ALTTEXT_LOG_PATH, "a", encoding="utf-8") as lf:
            lf.write(entry + "\n")
    except Exception as log_exc:
        print("Failed to write to log file:", log_exc)

In [None]:
# new captioniong with api logging

alttexts = {}

for i in range(1, 3):
  for j in range(1, 7):
    for k in range(1, 101):
      version_id = 'a' if k <= 50 else 'b'

      if k%50 != 0:
        filename = f"{i}_{j}_{k%50}_{version_id}.png"
      else:
        filename = f"{i}_{j}_{50}_{version_id}.png"

      # add path below
      image_path = f"your/path/here"

      if not os.path.exists(image_path):
          raise FileNotFoundError(f"Image not found: {image_path}")

      image_b64 = encode_image(image_path)

      prompt = '''You are an accessibility expert writing alternative text (alt text) for blind and low vision people.

                  Please describe the contents of the photo clearly and concisely. Focus on the aspects that are visually important and meaningful.

                  Keep the alt text at 100 words or less.  It should communicate the same information as the visual content, without adding anything extra.

                  Reply with only the alt text and nothing else.

                  '''
      try:
          start_ts = datetime.now().isoformat() + "Z"
          print(f"Calling the api for {filename}")
          resp = client.chat.completions.create(
              model="gpt-4.1",
              messages=[
                  {"role": "user", "content": [
                      {"type": "text", "text": prompt},
                      {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{image_b64}"}}
                  ]}
              ]
          )

          output = resp.choices[0].message.content
          usage_info = resp.usage
          alttexts.setdefault(filename, output)
          print(output[:80])

          if usage_info:
              input_tokens = resp.usage.prompt_tokens
              output_tokens = resp.usage.completion_tokens
              total_tokens = resp.usage.total_tokens

          # append log entry for successful call
          image_filename = os.path.basename(image_path)
          log_entry = (
              f"timestamp_utc: {start_ts} | status: success | model: gpt-4.1 | "
              f"image: {image_filename} | prompt_excerpt: {prompt.strip()[:80]!r}... | "
              f"alt text: {output.strip()[:120]!r}... | "
              f"input_tokens: {input_tokens} | output_tokens: {output_tokens} | total_tokens: {total_tokens}"
          )
          append_alttext_log(log_entry)
          print(f"Appended usage to log: {ALTTEXT_LOG_PATH}")

      except Exception as e:
          err_ts = datetime.utcnow().isoformat() + "Z"
          tb = traceback.format_exc()
          image_filename = os.path.basename(image_path)
          err_entry = (
              f"timestamp_utc: {err_ts} | status: error | model: gpt-4.1 | "
              f"image: {image_filename} | prompt_excerpt: {prompt.strip()[:80]!r}... | "
              f"error: {str(e)}"
          )
          append_alttext_log(err_entry)
          append_alttext_log("traceback_start")
          for line in tb.splitlines():
              append_alttext_log(line)
          append_alttext_log("traceback_end")

          print("Request failed. Details:")
          traceback.print_exc()


In [None]:
import pandas as pd

# Convert dictionary to DataFrame
df = pd.DataFrame(list(captions.items()), columns=['Filename', 'Alt Text'])

# Save to CSV
df.to_csv('/your/path/here/alttext_gpt', index=False)


print("✅ CSV file saved as 'alttext.csv'")


Claude

In [None]:
# Claude Sonnet 4.5
!pip install -q anthropic

from google.colab import drive, userdata
import os, base64, traceback, time
import pandas as pd
from datetime import datetime

# Anthropic client
import anthropic

raw_key = userdata.get("ANTHROPIC_API_KEY")
if not raw_key:
    raise RuntimeError("ANTHROPIC_API_KEY not found.")
api_key = raw_key.strip().replace("\n", "").replace("\r", "")
os.environ["ANTHROPIC_API_KEY"] = api_key
print("Anthropic API key loaded (first 8 chars):", api_key[:8] + "...")

# init client with explicit api_key
client = anthropic.Anthropic(api_key=api_key)

drive.mount('/content/drive')

# ------------------ CONFIG ----------------------------
ALTTEXT_LOG_PATH = "/your/path/here"
OUTPUT_CSV = "alttext_claude.csv"
IMAGES_DIR = "images_generated"
MODEL_NAME = "claude-sonnet-4-5"
PROMPT = '''You are an accessibility expert writing alternative text (alt text) for blind and low vision people.

                  Please describe the contents of the photo clearly and concisely. Focus on the aspects that are visually important and meaningful.

                  Keep the alt text at 100 words or less.  It should communicate the same information as the visual content, without adding anything extra.

                  Reply with only the alt text and nothing else.

                  '''

def append_alttext_log(entry: str):
    try:
        with open(ALTTEXT_LOG_PATH, "a", encoding="utf-8") as lf:
            lf.write(entry + "\n")
    except Exception as log_exc:
        print("Failed to write to log file:", log_exc)

def encode_image_base64(image_path):
    with open(image_path, "rb") as f:
        b = f.read()
    return base64.b64encode(b).decode("utf-8"), "image/png"  # if your images are jpg change accordingly

def encode_image_as_base64_from_file(path):
    with open(path, "rb") as f:
        data = f.read()
    # standard base64 encoding (no newlines)
    b64 = base64.b64encode(data).decode("utf-8")
    return b64

captions = {}
count = 0
# iterate files
for i in range(2, 3):
    for j in range(3, 7):
        for k in range(0, 101):
            count += 1
            if count < 86:
                continue
            version_id = 'a' if k <= 50 else 'b'
            if k % 50 != 0:
                filename = f"{i}_{j}_{k%50}_{version_id}.png"
            else:
                filename = f"{i}_{j}_{50}_{version_id}.png"
            image_path = os.path.join(IMAGES_DIR, filename)
            if not os.path.exists(image_path):
                raise FileNotFoundError(f"Image not found: {image_path}")

            # encode image to base64
            image_b64 = encode_image_as_base64_from_file(image_path)
            # assume png, change if jpg
            image_media_type = "image/png"

            content_blocks = [
                {
                    "type": "image",
                    "source": {
                        "type": "base64",
                        "media_type": image_media_type,
                        "data": image_b64,
                    },
                },
                {
                    "type": "text",
                    "text": PROMPT,
                },
            ]

            try:
                start_ts = datetime.utcnow().isoformat() + "Z"
                print(f"[Claude] Sending image {filename} ...")

                # call messages.create exactly with the base64 content block format
                resp = client.messages.create(
                    model=MODEL_NAME,
                    max_tokens=1024,
                    messages=[
                        {
                            "role": "user",
                            "content": content_blocks
                        }
                    ],
                )

                output_text = ""
                # If resp has .content attr
                if hasattr(resp, "content"):
                    c = resp.content
                    # if content is string
                    if isinstance(c, str):
                        output_text = c
                    elif isinstance(c, list):
                        # iterate blocks and collect text blocks
                        texts = []
                        for block in c:
                            try:
                                if isinstance(block, dict):
                                    if block.get("type") == "text":
                                        texts.append(block.get("text",""))
                                else:
                                    # attempt attribute access
                                    if getattr(block, "type", None) == "text":
                                        texts.append(getattr(block, "text", ""))
                            except Exception:
                                pass
                        output_text = "\n".join([t for t in texts if t])
                # fallback
                if not output_text:
                    # sometimes resp may be stringifiable
                    output_text = str(resp)

                captions[filename] = output_text.strip()
                print("Caption (preview):", output_text.strip()[:120])

                # try to extract usage info if present (some SDK responses include usage)
                input_tokens = None
                output_tokens = None
                try:
                    usage = getattr(resp, "usage", None)
                    if usage and isinstance(usage, dict):
                        input_tokens = usage.get("input_tokens")
                        output_tokens = usage.get("output_tokens")
                except Exception:
                    pass
                total_tokens = (input_tokens or 0) + (output_tokens or 0)

                # success log entry
                log_entry = (
                    f"timestamp_utc: {start_ts} | status: success | provider: anthropic | model: {MODEL_NAME} | "
                    f"image: {filename} | prompt_excerpt: {PROMPT.strip()[:80]!r}... | "
                    f"caption: {output_text.strip()[:200]!r}... | "
                    f"input_tokens: {input_tokens} | output_tokens: {output_tokens} | total_tokens: {total_tokens}"
                )
                append_alttext_log(log_entry)

            except Exception as e:
                err_ts = datetime.utcnow().isoformat() + "Z"
                tb = traceback.format_exc()
                err_entry = (
                    f"timestamp_utc: {err_ts} | status: error | provider: anthropic | model: {MODEL_NAME} | "
                    f"image: {filename} | prompt_excerpt: {PROMPT.strip()[:80]!r}... | error: {str(e)}"
                )
                append_alttext_log(err_entry)
                append_alttext_log("traceback_start")
                for line in tb.splitlines():
                    append_alttext_log(line)
                append_alttext_log("traceback_end")
                print("Request failed for image:", filename, " — see log for details.")
                # continue to next image (no retry/backoff per your instruction)
                continue

# write CSV
df = pd.DataFrame(list(captions.items()), columns=['Filename', 'Alt Text'])
df.to_csv(OUTPUT_CSV, index=False)
print("Saved captions to:", OUTPUT_CSV)

In [None]:
print(len(captions))

Gemini

In [None]:
# Gemini 2.5 Pro

!pip install -q google-genai

from google.colab import drive, userdata
import os, base64, traceback, time
import pandas as pd
from datetime import datetime

# Google GenAI SDK
from google import genai
from google.genai import types
from google.colab import drive, userdata
import os, base64, traceback, time, math, re
import pandas as pd
from datetime import datetime

from google import genai
from google.api_core.exceptions import ResourceExhausted, GoogleAPICallError
import google.genai.errors as genai_errors


api_key = userdata.get("GEMINI_API_KEY")
if not api_key:
    raise RuntimeError("GEMINI_API_KEY not found.")
os.environ["GEMINI_API_KEY"] = api_key
print("Gemini API key loaded:", api_key[:8] + "...")

drive.mount('/content/drive')

ALTTEXT_LOG_PATH = "/your/path/here"
OUTPUT_CSV = "alttext_gemini.csv"
IMAGES_DIR = "images_generated"
MODEL_NAME = "gemini-2.5-pro"
PROMPT = '''You are an accessibility expert writing alternative text (alt text) for blind and low vision people.

                  Please describe the contents of the photo clearly and concisely. Focus on the aspects that are visually important and meaningful.

                  Keep the alt text at 100 words or less.  It should communicate the same information as the visual content, without adding anything extra.

                  Reply with only the alt text and nothing else.

                  '''

def append_alttext_log(entry: str):
    try:
        with open(ALTTEXT_LOG_PATH, "a", encoding="utf-8") as lf:
            lf.write(entry + "\n")
    except Exception as log_exc:
        print("Failed to write to log file:", log_exc)

def read_image_bytes(image_path):
    with open(image_path, "rb") as f:
        return f.read()

client = genai.Client(api_key=api_key)

captions = {}

def extract_retry_seconds_from_message(msg: str):
    """Extract retry seconds from message like 'Please retry in 24.39s'."""
    m = re.search(r"retry in\s*([0-9]+(?:\.[0-9]+)?)\s*s", msg, flags=re.IGNORECASE)
    if m:
        return float(m.group(1))
    m2 = re.search(r"Please retry in\s*([0-9]+(?:\.[0-9]+)?)", msg, flags=re.IGNORECASE)
    if m2:
        return float(m2.group(1))
    return None

def extract_retry_seconds_from_error(exc):
    """
    Try to extract a retry-delay in seconds from a google.genai.errors.ClientError
    or from the error message string. Return float seconds or None.
    """
    try:
        resp_json = None
        if hasattr(exc, "response") and exc.response is not None:
            try:
                resp_json = exc.response.json()
            except Exception:
                resp_json = getattr(exc, "response_json", None)
        elif hasattr(exc, "response_json"):
            resp_json = exc.response_json
        # If we have a json body, look for 'error' -> 'message' text, or 'details' -> RetryInfo
        if isinstance(resp_json, dict):
            # check structured details for RetryInfo
            details = resp_json.get("error", {}).get("details", []) or resp_json.get("details", [])
            if isinstance(details, list):
                for det in details:
                    # det may be dict; try to find retryDelay key or nested RetryInfo
                    if isinstance(det, dict):
                        # Some providers include a 'retryDelay' directly in detail
                        if "retryDelay" in det:
                            val = det["retryDelay"]
                            # common form "24s" or "24.39342317s"
                            m = re.search(r"([0-9]+(?:\.[0-9]+)?)", str(val))
                            if m:
                                return float(m.group(1))
                        # nested 'retryDelay' under other keys
                        for v in det.values():
                            if isinstance(v, str) and "retry" in v.lower() and "s" in v:
                                m = re.search(r"([0-9]+(?:\.[0-9]+)?)", v)
                                if m:
                                    return float(m.group(1))
                        # some structures: det.get('@type') ... det.get('retryDelay')
                        if "@type" in det and "RetryInfo" in det.get("@type", ""):
                            # direct retryDelay field
                            if "retryDelay" in det:
                                m = re.search(r"([0-9]+(?:\.[0-9]+)?)", str(det["retryDelay"]))
                                if m:
                                    return float(m.group(1))
            # fallback: try message field
            msg = resp_json.get("error", {}).get("message") or resp_json.get("message")
            if isinstance(msg, str):
                m = re.search(r"retry in\s*([0-9]+(?:\.[0-9]+)?)\s*s", msg, flags=re.IGNORECASE)
                if m:
                    return float(m.group(1))
                # also look for "Please retry in 24s" variant
                m2 = re.search(r"Please retry in\s*([0-9]+(?:\.[0-9]+)?)\s*s", msg, flags=re.IGNORECASE)
                if m2:
                    return float(m2.group(1))
    except Exception:
        pass

    # 2) If exc has a string representation, search it
    try:
        s = str(exc)
        m = re.search(r"retry in\s*([0-9]+(?:\.[0-9]+)?)\s*s", s, flags=re.IGNORECASE)
        if m:
            return float(m.group(1))
        m2 = re.search(r"Please retry in\s*([0-9]+(?:\.[0-9]+)?)\s*s", s, flags=re.IGNORECASE)
        if m2:
            return float(m2.group(1))
    except Exception:
        pass

    return None

for i in range(1, 3):
    for j in range(1, 7):
        for k in range(1, 101):
            version_id = 'a' if k <= 50 else 'b'
            if k % 50 != 0:
                filename = f"{i}_{j}_{k%50}_{version_id}.png"
            else:
                filename = f"{i}_{j}_{50}_{version_id}.png"

            image_path = os.path.join(IMAGES_DIR, filename)
            if not os.path.exists(image_path):
                raise FileNotFoundError(f"Image not found: {image_path}")

            image_bytes = read_image_bytes(image_path)
            image_part = genai.types.Part.from_bytes(data=image_bytes, mime_type="image/png")
            contents = [image_part, PROMPT]

            # Retry loop per image
            while True:
                try:
                    start_ts = datetime.utcnow().isoformat() + "Z"
                    print(f"[Gemini] Generating caption for {filename}...")

                    response = client.models.generate_content(
                        model=MODEL_NAME,
                        contents=contents
                    )
                    output_text = getattr(response, "text", None)
                    if output_text is None:
                        output_text = str(response)

                    captions[filename] = output_text.strip()
                    print("Caption:", output_text.strip()[:100], "...")
                    log_entry = (
                        f"timestamp_utc: {start_ts} | status: success | provider: google | model: {MODEL_NAME} | "
                        f"image: {filename} | caption: {output_text.strip()[:150]!r}"
                    )
                    append_alttext_log(log_entry)
                    break  # Success → exit retry loop

                except (ResourceExhausted, genai_errors.ClientError) as e:
                    print(e)
                    msg = str(e)
                    retry_secs = extract_retry_seconds_from_message(msg)
                    if retry_secs is None:
                        retry_secs = 30.0  # default fallback
                    sleep_time = math.ceil(retry_secs)
                    print(f"[Gemini] Rate limit hit. Sleeping for {sleep_time}s (server suggested {retry_secs:.2f}s)...")
                    log_entry = (
                        f"timestamp_utc: {datetime.utcnow().isoformat()}Z | status: rate_limit | provider: google | "
                        f"model: {MODEL_NAME} | image: {filename} | sleep_seconds: {sleep_time}"
                    )
                    append_alttext_log(log_entry)
                    time.sleep(sleep_time)
                    continue  # ✅ retry same image

                except GoogleAPICallError as e:
                    print(e)
                    err_ts = datetime.utcnow().isoformat() + "Z"
                    tb = traceback.format_exc()
                    append_alttext_log(f"timestamp_utc: {err_ts} | status: api_error | image: {filename} | error: {e}")
                    append_alttext_log("traceback_start")
                    for line in tb.splitlines():
                        append_alttext_log(line)
                    append_alttext_log("traceback_end")
                    print(f"[Gemini] API error for {filename}, retrying in 30s...")
                    time.sleep(5)  # ✅ retry after delay
                    continue  # ✅ retry same image

                except Exception as e:
                    print(e)
                    err_ts = datetime.utcnow().isoformat() + "Z"
                    tb = traceback.format_exc()
                    append_alttext_log(f"timestamp_utc: {err_ts} | status: error | image: {filename} | error: {e}")
                    append_alttext_log("traceback_start")
                    for line in tb.splitlines():
                        append_alttext_log(line)
                    append_alttext_log("traceback_end")
                    print(f"[Gemini] Unexpected error on {filename}, retrying in 30s...")
                    time.sleep(5)  # ✅ retry after delay
                    continue  # ✅ retry same image


# Save results
df = pd.DataFrame(list(captions.items()), columns=['Filename', 'Caption'])
df.to_csv(OUTPUT_CSV, index=False)
print(f"\nSaved {len(df)} captions → {OUTPUT_CSV}")