# Azure OpenAI GPT-5 Document Analysis (Vision)
Use GPT-5 vision to extract 1-3 key fields from the images in `../docs`. Update the deployment name and endpoint for your Azure OpenAI resource before running.


## Prerequisites
- Python 3.9+ with `openai` installed.
- Azure OpenAI resource with a **GPT-5** (vision-enabled) deployment.
- Environment variables set: `AZURE_OPENAI_ENDPOINT`, `AZURE_OPENAI_KEY`, and `AZURE_OPENAI_GPT5_DEPLOYMENT` (deployment name).


In [13]:
import base64
import json
import os
from pathlib import Path
from openai import AzureOpenAI
from dotenv import load_dotenv
load_dotenv()

endpoint = os.environ.get("AZURE_OPENAI_ENDPOINT")
api_key = os.environ.get("AZURE_API_KEY")
deployment = os.environ.get("AZURE_OPENAI_DEPLOYMENT_NAME", "gpt-5")
api_version = os.environ.get("AZURE_OPENAI_API_VERSION", "2024-10-01-preview")

if not endpoint or not api_key:
    raise ValueError("Please set AZURE_OPENAI_ENDPOINT and AZURE_OPENAI_KEY.")

client = AzureOpenAI(
    azure_endpoint=endpoint, api_key=api_key, api_version=api_version
)

docs_dir = Path("../docs")
docs = sorted([p for p in docs_dir.glob("*.png")])
print(f"Found {len(docs)} images in {docs_dir.resolve()}")


Found 15 images in C:\repo\ai-foundry-craftkit\Model_Usecases\ocr\docs


In [14]:
def encode_image(path: Path) -> str:
    """Return a base64 data URL for the image."""
    with open(path, "rb") as f:
        b64 = base64.b64encode(f.read()).decode("utf-8")
    return f"data:image/png;base64,{b64}"

def analyze_document(image_path: Path, fields: list[str]) -> dict:
    prompt = (
        "Extract the following fields from the document. Return a JSON object "
        "with exactly these keys; use null if a value is missing or unreadable.\n"
        f"Fields: {', '.join(fields)}.\n"
    )

    completion = client.chat.completions.create(
        model=deployment,
        response_format={"type": "json_object"},
        messages=[
            {"role": "system", "content": "You extract concise values from documents."},
            {
                "role": "user",
                "content": [
                    {"type": "text", "text": prompt},
                    {"type": "image_url", "image_url": {"url": encode_image(image_path)}},
                ],
            },
        ],
    )

    content = completion.choices[0].message.content
    return json.loads(content)


In [15]:
# Choose the fields you want to capture (1-3).
fields_to_extract = ["DocumentType", "PrimaryName", "Date"]

example_docs = docs[:13]  # limit to first 3 files for demo
results = []
for doc_path in example_docs:
    result = analyze_document(doc_path, fields_to_extract)
    results.append({"file": doc_path.name, "extraction": result})

print(json.dumps(results, indent=2))


[
  {
    "file": "cl_23032-12.pdf_0.png",
    "extraction": {
      "DocumentType": "Application for Individual Life Insurance",
      "PrimaryName": "Mark B. Cuban",
      "Date": "10/2015"
    }
  },
  {
    "file": "cl_23032-12.pdf_11.png",
    "extraction": {
      "DocumentType": "Agreement, Authorization and Signatures",
      "PrimaryName": "Batzams Spartacus",
      "Date": "2018-01-16"
    }
  },
  {
    "file": "cl_23032-12.pdf_14.png",
    "extraction": {
      "DocumentType": "Application for Individual Life Insurance",
      "PrimaryName": "Heather A Johnson",
      "Date": null
    }
  },
  {
    "file": "cl_23032-12.pdf_15.png",
    "extraction": {
      "DocumentType": "Life Insurance Plan Information Form",
      "PrimaryName": "Kendra Isham",
      "Date": null
    }
  },
  {
    "file": "cl_23032-12.pdf_16.png",
    "extraction": {
      "DocumentType": "Life insurance application",
      "PrimaryName": null,
      "Date": null
    }
  },
  {
    "file": "cl_23032-1

## Tips
- Adjust `fields_to_extract` per document type.
- For PDFs, rasterize pages (e.g., with `pypdfium2` or `pdf2image`) before calling the model.
- Use low temperature (0-0.3) for deterministic JSON outputs.
