In [None]:
import openai
import base64
import json
from dotenv import load_dotenv
import os

load_dotenv()
key = os.getenv("OPEN_API_KEY")

def encode_image(image_path):
    with open(image_path, "rb") as image_file:
        return base64.b64encode(image_file.read()).decode("utf-8")

image_path = "WhatsApp Image 2025-04-05 at 13.19.46_a48c2e13.jpeg"
base64_image = encode_image(image_path)
json_schema = {
    "type": "object",
    "properties": {
        "keywords": {
            "type": "array",
            "description": "List of keywords that describe the content of the image.",
            "items": {"type": "string"},
            "minItems": 5,
            "maxItems": 5
        }
    },
    "required": ["keywords"],
    "additionalProperties": False
}

prompt = (
    "I am sending an image of an incident. Find out whether the image indicates an emergency or not and "
    "provide exactly five keywords describing the situation. Choose exclusively from keywords such as "
    "'accident', 'not accident', 'emergency', 'fire', 'flood', etc. Return only a JSON object following "
    "the schema provided."
)

client = openai.OpenAI(api_key=key)

response = client.responses.create(
    model="gpt-4o",
    input=[
        {
            "role": "user",
            "content": [
                {"type": "input_text", "text": prompt},
                {"type": "input_image", "image_url": f"data:image/jpeg;base64,{base64_image}"}
            ]
        }
    ],
    text={
        "format": {
            "name":"keywords",
            "type": "json_schema",
            "schema": {
                "type": "object",
                "strict": True,
                "name": "keywords",
                "properties": {
                    "keywords": {
                        "type": "array",
                        "items": {"type": "string"}
                    }
                },
                "required": ["keywords"],
                "additionalProperties": False
            }
        }
    }
)

print(response)



Response(id='resp_67f1811d087c8192972cad418d2ae9a30ff11436de15efa4', created_at=1743880477.0, error=None, incomplete_details=None, instructions=None, metadata={}, model='gpt-4o-2024-08-06', object='response', output=[ResponseOutputMessage(id='msg_67f1811e5bcc81929ad5b49f60eb55c70ff11436de15efa4', content=[ResponseOutputText(annotations=[], text='{"keywords":["fire","emergency","accident","panic","evacuation"]}', type='output_text')], role='assistant', status='completed', type='message')], parallel_tool_calls=True, temperature=1.0, tool_choice='auto', tools=[], top_p=1.0, max_output_tokens=None, previous_response_id=None, reasoning=Reasoning(effort=None, generate_summary=None), status='completed', text=ResponseTextConfig(format=ResponseFormatTextJSONSchemaConfig(name='keywords', schema_={'type': 'object', 'strict': True, 'name': 'keywords', 'properties': {'keywords': {'type': 'array', 'items': {'type': 'string'}}}, 'required': ['keywords'], 'additionalProperties': False}, type='json_sch

In [6]:
json.loads(response.output_text) # Convert the response to JSON format

{'keywords': ['fire', 'emergency', 'accident', 'panic', 'evacuation']}