# 🎯 Multi-Agent Multimodal Analysis


In [None]:
import boto3
from strands.models import BedrockModel
from strands import Agent
from strands_tools import image_reader, file_read
from video_reader import video_reader
from strands.tools import tool


# video_reader is already available as a built-in tool
agent = Agent(tools=[image_reader, file_read, video_reader])

## 🤖 Agent Configuration


In [None]:
# Updated multimodal system prompt to include video support
MULTIMODAL_SYSTEM_PROMPT = """ You are a helpful assistant that can process documents, images, and videos. 
Analyze their contents and provide relevant information.

You can:

1. For PNG, JPEG/JPG, GIF, or WebP formats use image_reader to process file
2. For PDF, csv, docx, xls or xlsx formats use file_read to process file  
3. For MP4, MOV, AVI, MKV, WebM formats use video_reader to process file
4. Just deliver the answer

When displaying responses:
- Format answers data in a human-readable way
- Highlight important information
- Handle errors appropriately
- Convert technical terms to user-friendly language
- Always reply in the original user language

Always reply in the original user language.
"""


# video_reader is available as built-in tool
# video_reader is already imported above
# load_tool(path="tools/video_reader.py", name="video_reader")  # This was causing errors

session = boto3.Session(region_name='us-west-2')

bedrock_model = BedrockModel(
    model_id="us.anthropic.claude-3-7-sonnet-20250219-v1:0",
    #model_id="us.amazon.nova-pro-v1:0",
    boto_session=session,
    streaming=False
)

# Updated multimodal agent with video support
multimodal_agent = Agent(
    system_prompt=MULTIMODAL_SYSTEM_PROMPT,
    tools=[image_reader, file_read, video_reader],
    model=bedrock_model
)

## 🎯 Usage Examples



In [None]:
# Example 1: Image analysis
print("=== 📸 IMAGE ANALYSIS ===")
image_result = multimodal_agent("Analyze the image data-sample/diagram.jpg in detail and describe everything you observe")
print(image_result)
print("\n" + "="*80 + "\n")

In [None]:
image_result.message['content'][0]['text']

In [None]:
# Example 2: Video analysis
print("=== 🎬 VIDEO ANALYSIS ===")
video_result = multimodal_agent("Analyze the video data-sample/moderation-video.mp4 and describe in detail the actions and scenes you observe")
print(video_result)
print("\n" + "="*80 + "\n")

In [None]:
video_result.message['content'][0]['text']

In [None]:
# Example 3: Document analysis (if you have a PDF document)
print("=== 📄 DOCUMENT ANALYSIS ===")
doc_result = multimodal_agent("Summarize as json the content of the document data-sample/Welcome-Strands-Agents-SDK.pdf")
print(doc_result)


In [None]:
doc_result.message['content'][0]['text']

In [None]:
# Example 4. Direct use of tools
video_analysis = multimodal_agent.tool.video_reader(
     video_path="video.mp4", 
     text_prompt="What are the main elements in this video?"
)

In [None]:
video_analysis.message['content'][0]['text']