# MLflow Strands Tracing with AWS Bedrock

This notebook demonstrates how to use MLflow's autologging with Strands agents using AWS Bedrock models.

In [1]:
from strands import Agent
from strands.models import BedrockModel
from strands_tools import calculator

import mlflow

In [2]:
# Enable autologging for Strands
mlflow.strands.autolog()

# Set MLflow tracking URI to use the Docker container
mlflow.set_tracking_uri("http://mlflow:5000")
mlflow.set_experiment("strand-agent-bedrock")

2025/12/06 14:05:49 INFO mlflow.tracking.fluent: Experiment with name 'strand-agent-bedrock' does not exist. Creating a new experiment.


<Experiment: artifact_location='/app/mlruns/1', creation_time=1765029949379, experiment_id='1', last_update_time=1765029949379, lifecycle_stage='active', name='strand-agent-bedrock', tags={}>

In [3]:
# Configure Bedrock model
# AWS credentials will be automatically detected from environment/AWS CLI
model = BedrockModel(
    model_id="global.anthropic.claude-sonnet-4-5-20250929-v1:0",
    max_tokens=2000,
    temperature=0.0,
)

In [4]:
# Create agent with Bedrock model, calculator tool, and custom system prompt
agent = Agent(
    model=model,
    tools=[calculator],
    system_prompt="You are a helpful mathematical assistant. Always show your work when performing calculations."
)

In [5]:
%%time
# Test the agent with a simple calculation
response = agent("What is 2+2")


Tool #1: calculator


The answer is **4**.

This is a simple addition problem: 2 + 2 = 4CPU times: user 206 ms, sys: 28.3 ms, total: 235 ms
Wall time: 5.78 s


In [6]:
# Run another query
response = agent("What is 2+2")


Tool #2: calculator


The answer is **4**.

This is a basic addition: 2 + 2 = 4

In [7]:
response

AgentResult(stop_reason='end_turn', message={'role': 'assistant', 'content': [{'text': 'The answer is **4**.\n\nThis is a basic addition: 2 + 2 = 4'}]}, metrics=EventLoopMetrics(cycle_count=4, tool_metrics={'calculator': ToolMetrics(tool={'toolUseId': 'tooluse_5y2eslpJS4yu_URqcpRDVw', 'name': 'calculator', 'input': {'expression': '2+2'}}, call_count=2, success_count=2, error_count=0, total_time=0.01042628288269043)}, cycle_durations=[3.1196177005767822, 2.3887977600097656], traces=[<strands.telemetry.metrics.Trace object at 0x74e836229190>, <strands.telemetry.metrics.Trace object at 0x74e8364a5130>, <strands.telemetry.metrics.Trace object at 0x74e83d3f9190>, <strands.telemetry.metrics.Trace object at 0x74e8361930e0>], accumulated_usage={'inputTokens': 7192, 'outputTokens': 159, 'totalTokens': 7351}, accumulated_metrics={'latencyMs': 9553}), state={}, interrupts=None, structured_output=None)

In [8]:
# Get the last trace
last_trace_id = mlflow.get_last_active_trace_id()
trace = mlflow.get_trace(trace_id=last_trace_id)

In [9]:
# Print the total token usage
total_usage = trace.info.token_usage
print("== Total token usage: ==")
print(f"  Input tokens: {total_usage['input_tokens']}")
print(f"  Output tokens: {total_usage['output_tokens']}")
print(f"  Total tokens: {total_usage['total_tokens']}")

== Total token usage: ==
  Input tokens: 3702
  Output tokens: 79
  Total tokens: 3781


In [10]:
# Print the token usage for each LLM call
print("\n== Detailed usage for each LLM call: ==")
for span in trace.data.spans:
    if usage := span.get_attribute("mlflow.chat.tokenUsage"):
        print(f"{span.name}:")
        print(f"  Input tokens: {usage['input_tokens']}")
        print(f"  Output tokens: {usage['output_tokens']}")
        print(f"  Total tokens: {usage['total_tokens']}")


== Detailed usage for each LLM call: ==
invoke_agent Strands Agents:
  Input tokens: 7192
  Output tokens: 159
  Total tokens: 7351
chat:
  Input tokens: 1816
  Output tokens: 53
  Total tokens: 1869
chat:
  Input tokens: 1886
  Output tokens: 26
  Total tokens: 1912
