## Extract Information from an Image
Upload any image (invoice, form, screenshot, etc.)

In [1]:
import os
from dotenv import load_dotenv
from openai import OpenAI

load_dotenv(override=True, dotenv_path="../.env")
my_api_key = os.getenv("OPENAI_API_KEY")

client = OpenAI(api_key=my_api_key)

import base64

image_path1 = "data/invoice.png" 
image_path2 = "data/office_lease.png"


In [2]:

# Encode image as base64
with open(image_path1, "rb") as f:
    image1_base64 = base64.b64encode(f.read()).decode("utf-8")
# Code explanation:
    # Converts the binary data into a Base64-encoded string.
    # Base64 is a way of representing binary data (like images) using only text characters (A–Z, a–z, 0–9, +, /).
    # This is necessary because APIs (like OpenAI’s) can only transmit text, not raw binary files.

mime_type = "image/png"

#Request extraction from image
response = client.chat.completions.create(
    model="gpt-5-nano",
    messages=[
        {
            "role": "system",
            "content": "You extract and summarize information from invoices or forms."
        },
        {
            "role": "user",
            "content": [
                {"type": "text", "text": '''
                 Extract as many fields, such as Invoice from or Company, Invoice, Invoice information - "
                 number, data, due date etc, Invoice product list from this image as a JSON object:
                 '''},
                {"type": "image_url", "image_url": {"url": f"data:{mime_type};base64,{image1_base64}"}}
            ]
        }
    ],
)

print("🧾 Extracted Info from Image:\n")
print(response.choices[0].message.content)

🧾 Extracted Info from Image:

{
  "invoice": {
    "number": "20281",
    "date": "2017-03-02",
    "due_date": "2017-03-16",
    "delivery_date": "2017-02-20",
    "payment_terms": "14 days net",
    "our_reference": "Marc Miller",
    "your_reference": "James Anderson",
    "buyer_order_number": "1234",
    "penalty_interest_percent": 7.5,
    "customer_business_id": "1212121-2",
    "customer_number": "2",
    "seller": {
      "name": "Spiceimporter Ltd.",
      "address": [
        "Kaisaniemenkatu 6A",
        "00100 Helsinki"
      ]
    },
    "buyer": {
      "name": "Bering Catering",
      "contact": "Marc Miller",
      "address": [
        "Bulevardi 15",
        "00180 Helsinki"
      ]
    },
    "items": [
      {
        "product_no": "18",
        "description": "Curry, 280g",
        "unit_price_eur": 4.50,
        "qty": 50,
        "vat_percent": 19,
        "total_eur": 225.00
      },
      {
        "product_no": "16",
        "description": "Stubb's Beef Spice 

In [None]:

# Encode image as base64
with open(image_path2, "rb") as f:
    image2_base64 = base64.b64encode(f.read()).decode("utf-8")

mime_type = "image/png"

# Request extraction from image
response = client.chat.completions.create(
    model="gpt-5-nano",
    messages=[
        {
            "role": "system",
            "content": "Extract and summarize information from this property for sale flyer."
        },
        {
            "role": "user",
            "content": [
                {"type": "text", "text": "Extract key fields from this image, such as property title, details etc as a JSON object:"},
                {"type": "image_url", "image_url": {"url": f"data:{mime_type};base64,{image2_base64}"}}
            ]
        }
    ],
)

print("Extracted Info from Image:\n")
print(response.choices[0].message.content)

In [5]:
import ollama
import base64

# --- Load and encode the image ---
image_path1 = "data/invoice.png"
with open(image_path1, "rb") as f:
    image1_base64 = base64.b64encode(f.read()).decode("utf-8")

# --- Prepare and send request ---
response = ollama.chat(
    model="llama3",
    messages=[
        {
            "role": "system",
            "content": "You extract and summarize structured information from invoices or forms."
        },
        {
            "role": "user",
            "content": (
                "Here is a Base64-encoded image of an invoice. "
                "Extract all available fields such as company name, invoice number, "
                "invoice date, due date, and product or line items. "
                "Return the result strictly as a JSON object.\n\n"
                f"Image (base64): {image1_base64}..."  # truncated for length
            ),
        },
    ],
)

# --- Display result ---
print("Extracted Info from Image:\n")
if "message" in response and "content" in response["message"]:
    print(response["message"]["content"])
else:
    print(response)


Extracted Info from Image:

The Base64-encoded image of an invoice!

To extract the fields, I'll need to decode the Base64 and analyze the contents. Please note that this is a generic extraction attempt, as the actual format may vary.

After decoding the Base64, we get:

```
<?xml version="1.0" encoding="UTF-8"?>
<Invoice xmlns="http://schemas.example.com/invoice/2019">
  <Company>
    <Name>Example Inc.</Name>
    <Address>
      <Street>123 Main St</Street>
      <City>New York</City>
      <State>NY</State>
      <Zip>10001</Zip>
    </Address>
  </Company>
  <Customer>
    <Name>Jane Doe</Name>
    <Address>
      <Street>456 Elm St</Street>
      <City>New York</City>
      <State>NY</State>
      <Zip>10002</Zip>
    </Address>
  </Customer>
  <Items>
    <Item>
      <Description>Widget A</Description>
      <Quantity>2</Quantity>
      <UnitPrice>10.00</UnitPrice>
      <Total>20.00</Total>
    </Item>
    <Item>
      <Description>Widget B</Description>
      <Quantity>1</Quan