# Alt Text for an image from multiple providers using Gradio UI

In [23]:
import os
from dotenv import load_dotenv
from openai import OpenAI

In [8]:
import gradio as gr  # oh yeah!

In [None]:
load_dotenv(override=True)
openai_api_key = os.getenv('OPENAI_API_KEY')
anthropic_api_key = os.getenv('ANTHROPIC_API_KEY')
google_api_key = os.getenv('GOOGLE_API_KEY')

if openai_api_key:
    print(f"OpenAI API Key exists and begins {openai_api_key[:8]}")
else:
    print("OpenAI API Key not set")
    
if anthropic_api_key:
    print(f"Anthropic API Key exists and begins {anthropic_api_key[:7]}")
else:
    print("Anthropic API Key not set")

if google_api_key:
    print(f"Google API Key exists and begins {google_api_key[:8]}")
else:
    print("Google API Key not set")

OpenAI API Key exists and begins sk-proj-
Anthropic API Key exists and begins sk-ant-
Google API Key exists and begins AIzaSyAM


# Reusable function to send image file to OpenAI computer vision endpoint

In [None]:
# This call fetches the image from the local file system

from openai import OpenAI
openai_client = OpenAI()  # use a unique name so the Anthropic cell doesn't overwrite it

system_prompt = "You are an expert in web accessibility whose job it is to write alt text for images that will be added to different websites, including sites for big corporations. When you write alt text you are looking for a balance between brevity and specificity. You should describe the crucial features of the image and omit the details that are not as important. Aim for a limit of 500 characters"

def vision_gpt(file_path):
    file = openai_client.files.create(
        file=open(file_path, "rb"),
        purpose="user_data"
    )

    response = openai_client.responses.create(
        model="gpt-5",
        input=[
            {
                "role": "user",
                "content": [
                    {
                        "type": "input_image",
                        "file_id": file.id,
                        "detail": "high"
                    },
                    {
                        "type": "input_text",
                        "text": system_prompt,
                    },
                ]
            }
        ]
    )
    return response.output_text

# Reusable function to send image file to Anthropic computer vision endpoint

In [None]:
import os
import anthropic

anthropic_client = anthropic.Anthropic()  # use a unique name so it doesn't overwrite openai_client
system_prompt = "You are an expert in web accessibility whose job it is to write alt text for images that will be added to websites, including sites for big corporations. When you write alt text you are looking for a balance between brevity and specificity. You should describe the crucial features of the image and omit the details that are not as important. Aim for a limit of 500 characters"


def vision_anthropic(file_path):
    base_name = os.path.basename(file_path) # Use filepath basename only for Anthropic API
    with open(file_path, "rb") as f:
        file_upload = anthropic_client.beta.files.upload(file=(base_name, f, "image/jpeg"))

    message = anthropic_client.beta.messages.create(
        model="claude-sonnet-4-5",
        max_tokens=1024,
        betas=["files-api-2025-04-14"],
        messages=[
            {
                "role": "user",
                "content": [
                    {
                        "type": "image",
                        "source": {
                            "type": "file",
                            "file_id": file_upload.id
                        }
                    },
                    {
                        "type": "text",
                        "text": system_prompt
                    }
                ]
            }
        ],
    )
    return message.content

# Helper function to route the requests to different models/providers

In [42]:
def stream_model(file_path, model):
    file_path = file_path.strip()
    if model == "GPT":
        result = vision_gpt(file_path)
    elif model == "Claude":
        result = vision_anthropic(file_path)
    else:
        raise ValueError("Unknown model")
    # Anthropic returns message.content (list of blocks); extract text for display
    if isinstance(result, list):
        result = "\n".join(
            getattr(block, "text", str(block)) for block in result
        )
    return result

# Gradio UI to select file path and model

In [9]:
file_input = gr.File(label="Select image file")
model_selector = gr.Dropdown(["GPT", "Claude"], label="Select model", value="GPT")
vision_output = gr.Textbox(label="Alt text", lines=15)

view = gr.Interface(
    fn=stream_model,
    title="LLMs",
    inputs=[file_input, model_selector],
    outputs=[vision_output],
)
view.launch()

NameError: name 'stream_model' is not defined

Next Steps
* submit the damn thing !!