In [1]:
import base64
import os
import uuid
import vertexai
from google.adk.agents import LlmAgent
from google.adk.apps import App
from google.adk.artifacts import InMemoryArtifactService
from google.adk.planners import BuiltInPlanner
from google.adk.plugins.save_files_as_artifacts_plugin import SaveFilesAsArtifactsPlugin
from google.adk.runners import Runner
from google.adk.sessions import InMemorySessionService
from google.adk.tools import load_artifacts
from google.genai.types import Part, Content, Blob, ThinkingConfig

[PROJECT_ID] = !gcloud config list --format 'value(core.project)'
LOCATION = 'us-central1'

vertexai.init(project=PROJECT_ID, location=LOCATION)

os.environ['GOOGLE_CLOUD_PROJECT'] = PROJECT_ID
os.environ['GOOGLE_CLOUD_LOCATION'] = LOCATION
os.environ['GOOGLE_GENAI_USE_VERTEXAI'] = 'True'

In [2]:
root_agent = LlmAgent(
    name='image_analyst_agent',
    model='gemini-2.5-flash',
    instruction='''
Your role is to analyze given image files.
Use load_artifacts() if the image content is not in the context.
''',
    tools=[load_artifacts],
    planner=BuiltInPlanner(
        thinking_config=ThinkingConfig(
            include_thoughts=False,
            thinking_budget=0,
        )
    ),
)

app = App(
    root_agent=root_agent,
    name='iamge_analyzer_app',
    plugins=[SaveFilesAsArtifactsPlugin()],
)

In [3]:
# Chat client to test app
class ChatClient:
    def __init__(self, app, user_id='default_user'):
        self._app = app
        self._user_id = user_id
        self._runner = Runner(
            app=self._app,
            artifact_service=InMemoryArtifactService(),
            session_service=InMemorySessionService(),
        )
        self._session = None
        
    async def stream(self, content):
        if not self._session:
            self._session = await self._runner.session_service.create_session(
                app_name=self._app.name,
                user_id=self._user_id,
            )
        if isinstance(content, str):
            content = Content(role='user', parts=[Part.from_text(text=content)])
        async_events = self._runner.run_async(
            user_id=self._user_id,
            session_id=self._session.id,
            new_message=content,
        )
        result = []
        async for event in async_events:
            print('====')
            print(event)
            print('====')
            if (event.content and event.content.parts):
                response = '\n'.join([p.text for p in event.content.parts if p.text])
                if response:
                    print(response)
                    result.append(response)
        return result

In [4]:
def get_image_data(file_path: str):
    with open(file_path, 'rb') as f:
        image_bytes = f.read()
    return base64.b64encode(image_bytes).decode('utf-8')

client = ChatClient(app)

image_base64 = get_image_data('testimage.png')

parts = [
    Part.from_text(text='Count the number of people in the image.'),
    Part(
        inline_data=Blob(
            mime_type='image/png',
            data=image_base64,
        ),
    ),
]
content = Content(role='user', parts=parts)

_ = await client.stream(content)



====
model_version='gemini-2.5-flash' content=Content(
  parts=[
    Part(
      function_call=FunctionCall(
        args={
          'artifact_names': [
            'artifact_e-64b2ed1d-7960-43ee-967b-c8302232333d_1',
          ]
        },
        id='adk-edc8d038-7c64-4a5a-b63c-56e8d67fb3eb',
        name='load_artifacts'
      )
    ),
  ],
  role='model'
) grounding_metadata=None partial=None turn_complete=None finish_reason=<FinishReason.STOP: 'STOP'> error_code=None error_message=None interrupted=None custom_metadata=None usage_metadata=GenerateContentResponseUsageMetadata(
  candidates_token_count=46,
  candidates_tokens_details=[
    ModalityTokenCount(
      modality=<MediaModality.TEXT: 'TEXT'>,
      token_count=46
    ),
  ],
  prompt_token_count=262,
  prompt_tokens_details=[
    ModalityTokenCount(
      modality=<MediaModality.TEXT: 'TEXT'>,
      token_count=262
    ),
  ],
  total_token_count=308,
  traffic_type=<TrafficType.ON_DEMAND: 'ON_DEMAND'>
) live_session_resu

In [5]:
_ = await client.stream('Count the number of dogs in the previous image.')

====
model_version='gemini-2.5-flash' content=Content(
  parts=[
    Part(
      function_call=FunctionCall(
        args={
          'artifact_names': [
            'artifact_e-64b2ed1d-7960-43ee-967b-c8302232333d_1',
          ]
        },
        id='adk-fb383601-bb8c-469c-8501-b84d4f860706',
        name='load_artifacts'
      )
    ),
  ],
  role='model'
) grounding_metadata=None partial=None turn_complete=None finish_reason=<FinishReason.STOP: 'STOP'> error_code=None error_message=None interrupted=None custom_metadata=None usage_metadata=GenerateContentResponseUsageMetadata(
  candidates_token_count=46,
  candidates_tokens_details=[
    ModalityTokenCount(
      modality=<MediaModality.TEXT: 'TEXT'>,
      token_count=46
    ),
  ],
  prompt_token_count=392,
  prompt_tokens_details=[
    ModalityTokenCount(
      modality=<MediaModality.TEXT: 'TEXT'>,
      token_count=392
    ),
  ],
  total_token_count=438,
  traffic_type=<TrafficType.ON_DEMAND: 'ON_DEMAND'>
) live_session_resu

In [6]:
session = await client._runner.session_service.get_session(
    app_name = client._app.name,
    user_id = client._user_id,
    session_id = client._session.id
)
for event in session.events:
    print(event)
    print('====')

model_version=None content=Content(
  parts=[
    Part(
      text='Count the number of people in the image.'
    ),
    Part(
      text='[Uploaded Artifact: "artifact_e-64b2ed1d-7960-43ee-967b-c8302232333d_1"]'
    ),
  ],
  role='user'
) grounding_metadata=None partial=None turn_complete=None finish_reason=None error_code=None error_message=None interrupted=None custom_metadata=None usage_metadata=None live_session_resumption_update=None input_transcription=None output_transcription=None avg_logprobs=None logprobs_result=None cache_metadata=None citation_metadata=None interaction_id=None invocation_id='e-64b2ed1d-7960-43ee-967b-c8302232333d' author='user' actions=EventActions(skip_summarization=None, state_delta={}, artifact_delta={}, transfer_to_agent=None, escalate=None, requested_auth_configs={}, requested_tool_confirmations={}, compaction=None, end_of_agent=None, agent_state=None, rewind_before_invocation_id=None) long_running_tool_ids=None branch=None id='9b438416-07bb-4e64-955