In [1]:
import base64
import os
import uuid
import vertexai
from google.adk.agents import LlmAgent
from google.adk.apps import App
from google.adk.artifacts import InMemoryArtifactService
from google.adk.plugins.save_files_as_artifacts_plugin import SaveFilesAsArtifactsPlugin
from google.adk.runners import Runner
from google.adk.sessions import InMemorySessionService
from google.adk.tools import load_artifacts
from google.genai.types import Part, Content, Blob

[PROJECT_ID] = !gcloud config list --format 'value(core.project)'
LOCATION = 'us-central1'

vertexai.init(project=PROJECT_ID, location=LOCATION)

os.environ['GOOGLE_CLOUD_PROJECT'] = PROJECT_ID
os.environ['GOOGLE_CLOUD_LOCATION'] = LOCATION
os.environ['GOOGLE_GENAI_USE_VERTEXAI'] = 'True'

In [2]:
# Chat client to test app
class ChatClient:
    def __init__(self, app, user_id='default_user'):
        self._app = app
        self._user_id = user_id
        self._runner = Runner(
            app=self._app,
            artifact_service=InMemoryArtifactService(),
            session_service=InMemorySessionService(),
        )
        self._session = None
        
    async def stream(self, content):
        if not self._session:
            self._session = await self._runner.session_service.create_session(
                app_name=self._app.name,
                user_id=self._user_id,
            )
        if isinstance(content, str):
            content = Content(role='user', parts=[Part.from_text(text=content)])
        async_events = self._runner.run_async(
            user_id=self._user_id,
            session_id=self._session.id,
            new_message=content,
        )
        result = []
        async for event in async_events:
            print('====')
            print(event)
            print('====')
            if (event.content and event.content.parts):
                response = '\n'.join([p.text for p in event.content.parts if p.text])
                if response:
                    print(response)
                    result.append(response)
        return result

In [3]:
root_agent = LlmAgent(
    name='image_analyst_agent',
    model='gemini-2.5-flash',
    instruction='''
Your role is to analyze given image files.
Use load_artifacts() if the image content is not in the context.
''',
    tools=[load_artifacts]
)

app = App(
    root_agent=root_agent,
    name='iamge_analyzer_app',
    plugins=[SaveFilesAsArtifactsPlugin()],
)

In [4]:
def get_image_data(file_path: str):
    with open(file_path, 'rb') as f:
        image_bytes = f.read()
    return base64.b64encode(image_bytes).decode('utf-8')

In [5]:
client = ChatClient(app)

image_base64 = get_image_data('testimage.png')

parts = [
    Part.from_text(text='describe the image'),
    Part(
        inline_data=Blob(
            mime_type='image/png',
            data=image_base64,
        ),
    ),
]
content = Content(role='user', parts=parts)

_ = await client.stream(content)



====
model_version='gemini-2.5-flash' content=Content(
  parts=[
    Part(
      function_call=FunctionCall(
        args={
          'artifact_names': [
            'artifact_e-03b725ad-9a31-47ba-9caf-5da01e394523_1',
          ]
        },
        id='adk-a105b53f-95fd-4500-9ea3-cb01686af578',
        name='load_artifacts'
      ),
      thought_signature=b'\n\xf7\x01\x01\x8f=k_\xcb\xef\xeaGp<N\xb5\xb6;x-Q3]\xe0\xb8]\xd4f\xde\x87+X\x15u>\xd1\x13=^W\xc4\xd2[\xb1\x82G\x88\x17\x94\xcaY\xb6{\xa1*\x91#\x8d \xf3R\x8e\xac\x80\xfe\x0e\x11\xa3\x1f\xff\xfc\xab\x001Ez;\x85\x84i\x10\xd3C\x8bO#\n\x8f\xfd\xad\x0b\xb7\x9f\xee\r\x8b\xd3...'
    ),
  ],
  role='model'
) grounding_metadata=None partial=None turn_complete=None finish_reason=<FinishReason.STOP: 'STOP'> error_code=None error_message=None interrupted=None custom_metadata=None usage_metadata=GenerateContentResponseUsageMetadata(
  candidates_token_count=43,
  candidates_tokens_details=[
    ModalityTokenCount(
      modality=<MediaModality

In [6]:
# For the second turn, the agent reuses the image content in the session context.
_ = await client.stream('The number of people in the image?')

====
model_version='gemini-2.5-flash' content=Content(
  parts=[
    Part(
      text='There are three people in the image: a father and two children.',
      thought_signature=b'\n\xa8\x02\x01\x8f=k_~]\x8e\xdf\xc86\x12\x03\x85\xbf}\x17#\x8eR^\xc6\xc1\xe0\x99X\x89\x1ct\xbf\xbc\x8d\x10\x92\xc9\xcdJE\xdd\x16\xa9?.\xae\x08\x96_\x8e\x01\xb9\xff\x0f\x99\xaf\xadO\x8f\x96>\x864LN\xe4k\xa6\xe6\x03y\xc1\xa1\x10\xf1ht\xc8[\xdd1^om\x0f\xa4~\xfd\xb2\xd6{\x94x\xf2\xeb\x0f...'
    ),
  ],
  role='model'
) grounding_metadata=None partial=None turn_complete=None finish_reason=<FinishReason.STOP: 'STOP'> error_code=None error_message=None interrupted=None custom_metadata=None usage_metadata=GenerateContentResponseUsageMetadata(
  candidates_token_count=14,
  candidates_tokens_details=[
    ModalityTokenCount(
      modality=<MediaModality.TEXT: 'TEXT'>,
      token_count=14
    ),
  ],
  prompt_token_count=500,
  prompt_tokens_details=[
    ModalityTokenCount(
      modality=<MediaModality.TEXT: 'TEXT'