In [None]:
%pip install eyepop==3.12.0

In [None]:
import getpass


EYEPOP_ACCOUNT_ID=input("Enter your Account UUID: ")
EYEPOP_API_KEY=getpass.getpass('Enter your API KEY: ')

In [None]:
NAMESPACE_PREFIX="XXXXXXXXXXX" # Add your namespace-prefix here

### Define Your Ability

In [None]:
from eyepop import EyePopSdk
from eyepop.data.data_types import InferRuntimeConfig, VlmAbilityGroupCreate, VlmAbilityCreate, TransformInto
from eyepop.worker.worker_types import CropForward, ForwardComponent, FullForward, InferenceComponent, Pop
import json


ability_prototypes = [
    VlmAbilityCreate(
        name=f"{NAMESPACE_PREFIX}.image-describe.describe-img",
        description="Describe the given image",
        worker_release="qwen3-instruct",
        text_prompt="""
          You are given a single image.

          Your task is to produce a factual description of what is visually observable.

          Write EXACTLY 100 words.

          ----------------------------------------
          INSTRUCTIONS:

          1. Describe only what is visible in the image.
          2. Do NOT infer intent, emotions, identity, relationships, or backstory.
          3. Do NOT speculate about events outside the frame.
          4. Do NOT add context that is not visually present.
          5. Do NOT mention camera quality, metadata, or assumptions.
          6. Use clear, neutral, objective language.
          7. Use present tense.
          8. Avoid repetition.
          9. No bullet points.
          10. No introduction or conclusion.

          ----------------------------------------
          CONTENT GUIDELINES:

          Include when visible:
          - People, animals, or objects
          - Clothing and appearance (without guessing identity)
          - Actions or poses
          - Spatial layout and depth
          - Environment or setting
          - Lighting and shadows
          - Colors and textures
          - Visible text or numbers
          - Weather or time-of-day indicators if visually clear

          ----------------------------------------
          STRICT OUTPUT RULES:

          - Output exactly 100 words.
          - Do not include a word count.
          - Do not include commentary.
          - Do not use quotation marks unless text appears in the image.
          - If the image is blank or unreadable, output: NO

          ----------------------------------------

          Return only the 100-word description.
        """,
        transform_into=TransformInto(),
        config=InferRuntimeConfig(
            max_new_tokens=150,
            image_size=512
        ),
        is_public=False
    )
]



### Create Your Ability

In [None]:
with EyePopSdk.dataEndpoint(api_key=EYEPOP_API_KEY, account_id=EYEPOP_ACCOUNT_ID) as endpoint:
    for ability_prototype in ability_prototypes:
        ability_group = endpoint.create_vlm_ability_group(VlmAbilityGroupCreate(
            name=ability_prototype.name,
            description=ability_prototype.description,
            default_alias_name=ability_prototype.name,
        ))
        ability = endpoint.create_vlm_ability(
            create=ability_prototype,
            vlm_ability_group_uuid=ability_group.uuid,
        )
        ability = endpoint.publish_vlm_ability(
            vlm_ability_uuid=ability.uuid,
            alias_name=ability_prototype.name,
        )
        ability = endpoint.add_vlm_ability_alias(
            vlm_ability_uuid=ability.uuid,
            alias_name=ability_prototype.name,
            tag_name="latest"
        )
        print(f"created ability {ability.uuid} with alias entries {ability.alias_entries}")

### Evalulate on a Single Image

In [None]:
from pathlib import Path


pop = Pop(components=[
   InferenceComponent(
       ability=f"{NAMESPACE_PREFIX}.image-describe.describe-img:latest"
   )
])


with EyePopSdk.workerEndpoint(api_key=EYEPOP_API_KEY) as endpoint:
   endpoint.set_pop(pop)
   sample_img_path = Path("/content/sample_img.jpg")
   job = endpoint.upload(sample_img_path)
   while result := job.predict():
      print(json.dumps(result, indent=2))

print("Done")