<a href="https://colab.research.google.com/github/graphlit/graphlit-samples/blob/main/python/Notebook%20Examples/Graphlit_2024_09_17_Describe_and_Map_Images.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

**Description**

This example shows how to ingest a series of images, use Anthropic Sonnet 3.5 to automatically generate image descriptions, and then display images on a searchable map.

**Requirements**

Prior to running this notebook, you will need to [signup](https://docs.graphlit.dev/getting-started/signup) for Graphlit, and [create a project](https://docs.graphlit.dev/getting-started/create-project).

You will need the Graphlit organization ID, preview environment ID and JWT secret from your created project.

Assign these properties as Colab secrets: GRAPHLIT_ORGANIZATION_ID, GRAPHLIT_ENVIRONMENT_ID and GRAPHLIT_JWT_SECRET.


---

Install Graphlit Python client SDK

In [22]:
!pip install --upgrade graphlit-client



Install Folium

In [23]:
!pip install --upgrade folium



Initialize Graphlit

In [24]:
import os
from google.colab import userdata
from graphlit import Graphlit
from graphlit_api import input_types, enums, exceptions

os.environ['GRAPHLIT_ORGANIZATION_ID'] = userdata.get('GRAPHLIT_ORGANIZATION_ID')
os.environ['GRAPHLIT_ENVIRONMENT_ID'] = userdata.get('GRAPHLIT_ENVIRONMENT_ID')
os.environ['GRAPHLIT_JWT_SECRET'] = userdata.get('GRAPHLIT_JWT_SECRET')

graphlit = Graphlit()

Define Graphlit helper functions

In [25]:
from typing import List, Optional

# Create specification for Anthropic Sonnet 3.5
async def create_anthropic_specification():
    if graphlit.client is None:
        return;

    input = input_types.SpecificationInput(
        name="Anthropic Claude Sonnet 3.5",
        type=enums.SpecificationTypes.EXTRACTION,
        serviceType=enums.ModelServiceTypes.ANTHROPIC,
        anthropic=input_types.AnthropicModelPropertiesInput(
            model=enums.AnthropicModels.CLAUDE_3_5_SONNET,
        )
    )

    try:
        response = await graphlit.client.create_specification(input)

        return response.create_specification.id if response.create_specification is not None else None
    except exceptions.GraphQLClientError as e:
        print(str(e))
        return None

    return None

async def create_workflow(specification_id: str):
    if graphlit.client is None:
        return;

    input = input_types.WorkflowInput(
        name="Image Extraction",
        extraction=input_types.ExtractionWorkflowStageInput(
            jobs=[
                input_types.ExtractionWorkflowJobInput(
                    connector=input_types.EntityExtractionConnectorInput(
                        type=enums.EntityExtractionServiceTypes.MODEL_IMAGE,
                        modelImage=input_types.ModelImageExtractionPropertiesInput(
                            specification=input_types.EntityReferenceInput(id=specification_id)
                        )
                    )
                )
            ]
        )
    )

    try:
        response = await graphlit.client.create_workflow(input)

        return response.create_workflow.id if response.create_workflow is not None else None
    except exceptions.GraphQLClientError as e:
        print(str(e))
        return None

    return None

async def ingest_uri(uri: str, workflow_id: str):
    if graphlit.client is None:
        return

    try:
        # Using synchronous mode, so the notebook waits for the content to be ingested
        response = await graphlit.client.ingest_uri(uri=uri, workflow=input_types.EntityReferenceInput(id=workflow_id), is_synchronous=True)

        return response.ingest_uri.id if response.ingest_uri is not None else None
    except exceptions.GraphQLClientError as e:
        print(str(e))
        return None

async def query_contents(search_text: Optional[str] = None):
    if graphlit.client is None:
        return;

    try:
        response = await graphlit.client.query_contents(
            filter=input_types.ContentFilter(
                search=search_text,
                searchType=enums.SearchTypes.HYBRID,
                fileTypes=[enums.FileTypes.IMAGE],
            )
        )

        return response.contents.results if response.contents is not None else None
    except exceptions.GraphQLClientError as e:
        print(str(e))
        return None

async def delete_all_specifications():
    if graphlit.client is None:
        return;

    _ = await graphlit.client.delete_all_specifications(is_synchronous=True)

async def delete_all_workflows():
    if graphlit.client is None:
        return;

    _ = await graphlit.client.delete_all_workflows(is_synchronous=True)

async def delete_all_contents():
    if graphlit.client is None:
        return;

    _ = await graphlit.client.delete_all_contents(is_synchronous=True)


Execute Graphlit example

In [26]:
from IPython.display import display, Markdown
import folium
import asyncio

# Remove any existing specifications, workflows and contents; only needed for notebook example
await delete_all_specifications()
await delete_all_workflows()
await delete_all_contents()

print('Deleted all contents.')

uris = [
    "https://graphlitplatform.blob.core.windows.net/samples/Images/Chicago/20170601_081526.jpg",
    "https://graphlitplatform.blob.core.windows.net/samples/Images/Chicago/20170601_082315.jpg",
    "https://graphlitplatform.blob.core.windows.net/samples/Images/Chicago/20170601_213903.jpg",
    "https://graphlitplatform.blob.core.windows.net/samples/Images/Chicago/20170617_145925.jpg",
    "https://graphlitplatform.blob.core.windows.net/samples/Images/Chicago/20170816_214947.jpg",
    "https://graphlitplatform.blob.core.windows.net/samples/Images/Chicago/20170816_214950.jpg",
    "https://graphlitplatform.blob.core.windows.net/samples/Images/Chicago/20170818_090529.jpg",
    "https://graphlitplatform.blob.core.windows.net/samples/Images/Chicago/20170907_193136.jpg",
    "https://graphlitplatform.blob.core.windows.net/samples/Images/Chicago/20170907_194427.jpg",
    "https://graphlitplatform.blob.core.windows.net/samples/Images/Chicago/20170907_194700.jpg",
    "https://graphlitplatform.blob.core.windows.net/samples/Images/Chicago/20170907_200452.jpg",
    "https://graphlitplatform.blob.core.windows.net/samples/Images/Chicago/20170907_200458.jpg",
    "https://graphlitplatform.blob.core.windows.net/samples/Images/Chicago/20170907_201106.jpg",
    "https://graphlitplatform.blob.core.windows.net/samples/Images/Chicago/20170907_201322.jpg",
    "https://graphlitplatform.blob.core.windows.net/samples/Images/Chicago/20170907_201729.jpg",
    "https://graphlitplatform.blob.core.windows.net/samples/Images/Chicago/20170907_201938.jpg",
    "https://graphlitplatform.blob.core.windows.net/samples/Images/Chicago/20170907_203103.jpg",
    "https://graphlitplatform.blob.core.windows.net/samples/Images/Chicago/20170907_204549.jpg",
    "https://graphlitplatform.blob.core.windows.net/samples/Images/Chicago/20170907_204901.jpg",
    "https://graphlitplatform.blob.core.windows.net/samples/Images/Chicago/20171006_134259.jpg"
]

specification_id = await create_anthropic_specification()

if specification_id is not None:
    print(f'Created specification [{specification_id}].')

    workflow_id = await create_workflow(specification_id)

    if workflow_id is not None:
        print(f'Created workflow [{workflow_id}].')

        # Create a list of tasks for ingesting the URIs
        tasks = [ingest_uri(uri, workflow_id) for uri in uris]

        # Use asyncio.gather() to run all ingestions in parallel
        await asyncio.gather(*tasks)

        print('Ingested all images.')

Deleted all contents.
Created specification [388be350-8f53-433d-b749-658b91e4da79].
Created workflow [f9851273-e55f-405d-b796-78c4193fa1b8].
Ingested all images.


In [28]:
        search_text = input('Search by image description (or hit <enter> to map all images):')

        if search_text == '':
            search_text = None

        map = folium.Map(zoom_start=12, max_zoom=22)

        contents = await query_contents(search_text)

        if contents is not None:
            print(f'Found {len(contents)} contents.')

            marker_coords = []

            for content in contents:
                # NOTE: image location is automatically parsed from image metadata, and address is automatically looked up based on lat/lon
                if content is not None and content.location is not None:
                    if content.location.latitude is not None and content.location.longitude is not None and content.address is not None and content.image is not None and content.image.description is not None:
                        custom_icon = folium.CustomIcon(
                            icon_image=content.image_uri,
                            icon_size=(100, 100),
                            icon_anchor=(50, 50)
                        )

                        popup_content = f'<b>Address:</b> {content.address.street_address}, {content.address.city} {content.address.region}<br/><br/><b>Description:</b> {content.image.description[:3072]}'
                        popup = folium.Popup(popup_content, max_width=400, min_width=300)

                        folium.Marker(
                            location=[content.location.latitude, content.location.longitude],
                            popup=popup,
                            icon=custom_icon
                        ).add_to(map)

                        marker_coords.append([content.location.latitude, content.location.longitude])

            map.fit_bounds(marker_coords)

            display(map)

Search by image description (or hit <enter> to map all images):Wrigley
Found 3 contents.
