# Delivery OCR

## Define Schema

In [38]:
from pydantic import BaseModel
from typing import List, Optional, Union


class DeliveryItem(BaseModel):
    delivery_id: Optional[str] = None
    code: Optional[str] = None
    finished_at: Optional[str] = None  


class DeliverySummary(BaseModel):
    date: Optional[str] = None
    distance_km: Optional[float] = None
    deliveries: Optional[ int] = None
    tips_kd: Optional[float] = None
    collected_kd: Optional[ float] = None
    delivery_items: Optional[List[DeliveryItem]] = None


## Setup Env

In [None]:
import os 
os.environ["GOOGLE_API_KEY"]="xxxx"



## Call LLM

In [40]:
from PIL import Image
from google import genai

client = genai.Client(api_key=os.environ["GOOGLE_API_KEY"])


In [None]:
prompt = """You are provided with an image of a delivery summary screen from a mobile application.

Extract the following structured information from the image and return it as a JSON object:

{
  "date": string (format: "YYYY-MM-DD"),
  "distance_km": float,
  "deliveries": integer,
  "tips_kd": float | null,
  "collected_kd": float | null,
  "delivery_items": [
    {
      "delivery_id": string,
      "code": string,
      "finished_at": string (format: "HH:MM")
    },
    ...
  ]
}

Rules:
- Use only the data that is clearly visible in the image.
- If a value is missing or unclear, return it as null.
- Return only the JSON â€” no comments, no explanations, no markdown.
"""
def load_image(image_path):
    return Image.open(image_path)


def extract_delivery_data(image_path):
    image = load_image(image_path)
    response = client.models.generate_content(
        model = "gemini-1.5-pro",
        contents = [prompt, image],
        config={
            'response_schema': DeliverySummary,
            'response_mime_type': 'application/json'
        })
    return response


In [31]:
image_path = "sample2.jpeg"  # Path to your image file
json_result = extract_delivery_data(image_path)
json_result.parsed

DeliverySummary(date='2025-03-18', distance_km=94.459, deliveries=13, tips_kd=0.35, collected_kd=13.3, delivery_items=[DeliveryItem(delivery_id='2036812712', code='#9502', finished_at='23:59'), DeliveryItem(delivery_id='2036632395', code='#7876', finished_at='22:23'), DeliveryItem(delivery_id='2036538931', code='#7745', finished_at='21:06'), DeliveryItem(delivery_id='2036431750', code='#7612', finished_at='19:54'), DeliveryItem(delivery_id='2036375097', code='#7529', finished_at='18:53'), DeliveryItem(delivery_id='2036286407', code='#7463', finished_at=None)])

In [None]:
image_path = "sample1.jpeg"  # Path to your image file
json_result = extract_delivery_data(image_path)
json_result.parsed

DeliverySummary(date='2025-03-15', distance_km=112.482, deliveries=15, tips_kd=None, collected_kd=None, delivery_items=[DeliveryItem(delivery_id='2032394476', code='3680', finished_at='23:14'), DeliveryItem(delivery_id='2032389414', code='3679', finished_at='23:04'), DeliveryItem(delivery_id='2032294813', code='3022', finished_at='22:02'), DeliveryItem(delivery_id='2032195044', code='1060', finished_at='20:53')])