In [None]:
%pip install eyepop==3.12.0

In [None]:
import getpass


EYEPOP_ACCOUNT_ID=input("Enter your Account UUID: ")
EYEPOP_API_KEY=getpass.getpass('Enter your API KEY: ')

In [None]:
NAMESPACE_PREFIX="XXXXXXXXXXXXX" # Add your namespace-prefix here

In [None]:
from eyepop import EyePopSdk
from eyepop.data.data_types import InferRuntimeConfig, VlmAbilityGroupCreate, VlmAbilityCreate, TransformInto
from eyepop.worker.worker_types import CropForward, ForwardComponent, FullForward, InferenceComponent, Pop
import json


ability_prototypes = [
    VlmAbilityCreate(
        name=f"{NAMESPACE_PREFIX}.image-describe.OCR-Read-Drivers-License",
        description="Extract relevent information from drivers license",
        worker_release="qwen3-instruct",
        text_prompt="""
        You are given an image of a driver's license.

            Your task is to extract structured data that is clearly visible in the image.

            Return ONLY valid JSON.
            Do not include explanation.
            Do not include markdown.
            Do not include commentary.

            ---------------------------------------
            INSTRUCTIONS:

            1. Extract only text that is clearly readable.
            2. Do NOT guess or infer missing values.
            3. If a field is not visible or unreadable, return null.
            4. Preserve capitalization exactly as shown.
            5. Preserve spacing inside fields where applicable.
            6. Do NOT reformat dates unless the format is explicitly clear.
            7. Do NOT fabricate data.
            8. If the image is not a driver's license, return:
            {
              "document_type": "unknown"
            }

            ---------------------------------------
            RETURN THIS EXACT JSON STRUCTURE:

            {
              "document_type": "drivers_license",
              "issuing_state_or_country": null,
              "license_number": null,
              "first_name": null,
              "middle_name": null,
              "last_name": null,
              "date_of_birth": null,
              "issue_date": null,
              "expiration_date": null,
              "address": {
                "street": null,
                "city": null,
                "state": null,
                "postal_code": null
              },
              "sex": null,
              "height": null,
              "weight": null,
              "eye_color": null,
              "hair_color": null,
              "restrictions": null,
              "endorsements": null,
              "class": null
            }

            ---------------------------------------

            If multiple values appear for a field, return the most prominent official value.

            Return only the JSON object.
        """,
        transform_into=TransformInto(),
        config=InferRuntimeConfig(
            max_new_tokens=250,
            image_size=512
        ),
        is_public=False
    )
]



In [None]:
with EyePopSdk.dataEndpoint(api_key=EYEPOP_API_KEY, account_id=EYEPOP_ACCOUNT_ID) as endpoint:
    for ability_prototype in ability_prototypes:
        ability_group = endpoint.create_vlm_ability_group(VlmAbilityGroupCreate(
            name=ability_prototype.name,
            description=ability_prototype.description,
            default_alias_name=ability_prototype.name,
        ))
        ability = endpoint.create_vlm_ability(
            create=ability_prototype,
            vlm_ability_group_uuid=ability_group.uuid,
        )
        ability = endpoint.publish_vlm_ability(
            vlm_ability_uuid=ability.uuid,
            alias_name=ability_prototype.name,
        )
        ability = endpoint.add_vlm_ability_alias(
            vlm_ability_uuid=ability.uuid,
            alias_name=ability_prototype.name,
            tag_name="latest"
        )
        print(f"created ability {ability.uuid} with alias entries {ability.alias_entries}")

### Evalulate on a Single Image

In [None]:
from pathlib import Path


pop = Pop(components=[
   InferenceComponent(
       ability=f"{NAMESPACE_PREFIX}.image-describe.OCR-Read-Drivers-License:latest"
   )
])


with EyePopSdk.workerEndpoint(api_key=EYEPOP_API_KEY) as endpoint:
   endpoint.set_pop(pop)
   sample_img_path = Path("/content/sample_img.jpg")
   job = endpoint.upload(sample_img_path)
   while result := job.predict():
      print(json.dumps(result, indent=2))

print("Done")

### Evaluation Flow

In [None]:
from pathlib import Path

pop = Pop(components=[
    InferenceComponent(
        ability=f"{NAMESPACE_PREFIX}.image-describe.OCR-Read-Drivers-License:latest"
    )
])

all_results = {}

with EyePopSdk.workerEndpoint(api_key=EYEPOP_API_KEY) as endpoint:
    endpoint.set_pop(pop)
    directory_path = Path("/content/sample_data")
    for item in directory_path.iterdir():
        job = endpoint.upload(str(item))
        file_results = []

        while result := job.predict():
            file_results.append(result)

        all_results[item.name] = file_results

output_path = Path("/content/sample_data/output.json")
with open(output_path, "w") as f:
  json.dump(all_results, f, indent=2)

print("Done")