# CloudWatch Metrics for Amazon Bedrock
**Last Updated:** Oct 14th 2024

**Modified by:** Chae Clark

This notebook shows example useage of Amazon CloudWatch as a store of metrics for Amazon Bedrock usage. You can use this to log custom metrics related to your Bedrock usage along the lines of latency, token usage, retry requests, etc.

In [1]:
import time
import boto3
import numpy as np

from datetime import datetime
from datetime import timedelta
from typing import List, Dict, Optional, Tuple

## Methods
The three methods:
1. query_endpoint - calls the bedrock service. This is just to create real metadata
2. set_bedrock_metadata_in_cloudwatch - puts the metadata metrics into CloudWatch
3. get_bedrock_metadata_from_cloudwatch - gets the selected metrics and dimensions from CloudWatch

**NOTE:** It may take a few minutes for the CloudWatch metrics to populate. 

In [2]:
def query_endpoint(
    prompt: List[str],
    history: List[Dict[str, str]],
    model: Dict[str, str],
    system: Optional[str] = None,
    client: Optional[boto3.client] = None,
    max_tokens: int = 300,
    temperature: float = 0.01,
    top_p: float = 0.99,
    region: str = 'us-east-1'
) -> Tuple[str, int, int, float]:
    """
    Query the Bedrock endpoint with the given prompt and history.

    Args:
        prompt (List[str]): List of alternating user and assistant messages.
        history (List[Dict[str, str]]): Previous conversation history.
        model (Dict[str, str]): Model configuration including endpoint.
        system (Optional[str]): System message for the conversation.
        client (Optional[boto3.client]): Boto3 client for Bedrock runtime.
        max_tokens (int): Maximum number of tokens in the response.
        temperature (float): Sampling temperature for response generation.
        top_p (float): Top-p sampling parameter.
        region (str): AWS region for the Bedrock client.

    Returns:
        Tuple[str, int, int, float]: Generated text, input tokens, output tokens, and latency.
    """
    if client is None:
        client = boto3.client('bedrock-runtime', region_name=region)

    messages = []
    for msg in history:
        messages.extend([
            {'role': 'user', 'content': [{'text': msg['prompt']}]},
            {'role': 'assistant', 'content': [{'text': msg['response']}]}
        ])

    messages = [
        {'role': 'user' if i % 2 == 0 else 'assistant', 'content': [{'text': msg}]}
        for i, msg in enumerate(prompt)
    ]

    inference_config = {
        'maxTokens': max_tokens,
        'temperature': temperature,
        'topP': top_p,
    }

    kwargs = {
        'modelId': model['endpoint'],
        'messages': messages,
        'inferenceConfig': inference_config
    }

    if system:
        kwargs['system'] = [{'text': system}]

    response = client.converse(**kwargs)

    generated_text = response['output']['message']['content'][0]['text']
    tokens_in = response['usage']['inputTokens']
    tokens_out = response['usage']['outputTokens']
    latency = response['metrics']['latencyMs']

    return generated_text, tokens_in, tokens_out, latency, response

In [3]:
def set_bedrock_metadata_in_cloudwatch(
    js: Dict,
    metric_namespace: str,
    username: str = 'NA',
    department: str = 'NA',
    client: Optional[boto3.client] = None,
    region: str = 'us-east-1',
    debug: bool = False
):
    if client is None:
        client = boto3.client('cloudwatch', region_name=region)

    dimensions = [
        {'Name': 'BedrockCall', 'Value': 'ConverseAPI'},
        {'Name': 'User', 'Value': username},
        {'Name': 'Department', 'Value': department}
    ]

    # Put the metric data
    response = client.put_metric_data(
        Namespace=metric_namespace,
        MetricData=[
            {
                'MetricName': 'Latency',
                'Value': int(js.get('metrics').get('latencyMs')),
                'Unit': 'Milliseconds',
                'Timestamp': datetime.utcnow(),
                'Dimensions': dimensions
            },
            {
                'MetricName': 'InputTokens',
                'Value': int(js.get('usage').get('inputTokens')),
                'Unit': 'Count',
                'Timestamp': datetime.utcnow(),
                'Dimensions': dimensions
            },
            {
                'MetricName': 'OutputTokens',
                'Value': int(js.get('usage').get('outputTokens')),
                'Unit': 'Count',
                'Timestamp': datetime.utcnow(),
                'Dimensions': dimensions
            }
        ]
    )

    if debug:
        print(f"Metric data stored in CloudWatch: {response}")

In [4]:
def get_bedrock_metadata_from_cloudwatch(
    metric_namespace: str,
    metrics: List[str],
    start_time,
    end_time,
    username: Optional[str] = None,
    department: Optional[str] = None,
    client: Optional[boto3.client] = None,
    region: str = 'us-east-1',
    debug: bool = False
):

    if client is None:
        client = boto3.client('cloudwatch', region_name=region)

    dimensions = [{'Name': 'BedrockCall', 'Value': 'ConverseAPI'}]
    if not (username is None):
        dimensions.append({'Name': 'User', 'Value': username})
    if not (department is None):
        dimensions.append({'Name': 'Department', 'Value': department})

    response = []
    for metric_name in metrics:
        response.append(
            client.get_metric_statistics(
                Namespace=metric_namespace,
                MetricName=metric_name,
                Dimensions=dimensions,
                StartTime=start_time,
                EndTime=end_time,
                Period=600,  # 10-minute periods
                Statistics=['Average', 'Maximum', 'Minimum']
            )
        )

    if debug:
        print(response[0]['Datapoints'])
    return response

## Sample Data
This generates sample data calling the Bedrock service and returning the result along withe metadta that we can place into CloudWatch. The model used here doesn't change any of the following code, as we are leveraging the ConverseAPI that standardizes the call and response from the models.

In [5]:
region = 'us-east-1'

models_list = [
    {'name':'Claude 3 Haiku', 'endpoint':'anthropic.claude-3-haiku-20240307-v1:0', 'source':'bedrock'},
    {'name':'Claude 3 Sonnet', 'endpoint':'anthropic.claude-3-sonnet-20240229-v1:0', 'source':'bedrock'},
    {'name':'Claude 3.5 Sonnet', 'endpoint':'anthropic.claude-3-5-sonnet-20240620-v1:0', 'source':'bedrock'},
    {'name':'LLAMA 3 70B', 'endpoint':'meta.llama3-70b-instruct-v1:0', 'source':'bedrock'},
    {'name':'LLAMA 3 8B', 'endpoint':'meta.llama3-8b-instruct-v1:0', 'source':'bedrock'},
    {'name':'Mistral Large', 'endpoint':'mistral.mistral-large-2402-v1:0', 'source':'bedrock'},
]

generated_text, tokens_in, tokens_out, latency, response = query_endpoint(
    prompt=['Hello'],
    history=[],
    model=models_list[4],
    max_tokens=300,
    temperature=0.99,
    top_p=0.99,
    region=region
)
response

{'ResponseMetadata': {'RequestId': '1e561d50-88db-4fa0-a74c-dee25c358c6b',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'date': 'Mon, 14 Oct 2024 15:51:08 GMT',
   'content-type': 'application/json',
   'content-length': '281',
   'connection': 'keep-alive',
   'x-amzn-requestid': '1e561d50-88db-4fa0-a74c-dee25c358c6b'},
  'RetryAttempts': 0},
 'output': {'message': {'role': 'assistant',
   'content': [{'text': "\n\nHello! It's nice to meet you. Is there something I can help you with, or would you like to chat?"}]}},
 'stopReason': 'end_turn',
 'usage': {'inputTokens': 15, 'outputTokens': 27, 'totalTokens': 42},
 'metrics': {'latencyMs': 426}}

## Store Bedrock Metadata in CloudWatch
This loops through a number of calls ans stores the data for fictional users and departments into CloudWatch.

In [6]:
usernames = ['arnold.kelsey', 'admin.it', 'jessica.wolsley']
departments = ['research', 'hr', 'platforms-div-2']

for _ in range(500):
    set_bedrock_metadata_in_cloudwatch(response, metric_namespace='BedrockUsage', username=np.random.choice(usernames, 1)[0], department=np.random.choice(departments, 1)[0])
    time.sleep(.05)

## Retrieve Metrics from CloudWatch
This retrieves statistics for the metrics stored in CloudWatch.

In [7]:
end_time = datetime.utcnow()
start_time = end_time - timedelta(hours=10)  # Get data for the last 10 hours

metrics = get_bedrock_metadata_from_cloudwatch('BedrockUsage', ['OutputTokens', 'Latency'], start_time, end_time, 'arnold.kelsey', 'research')

print("Metrics:")
for metric in metrics:
    datapoints = metric['Datapoints']
    for datapoint in datapoints:
        print(f"Timestamp: {datapoint['Timestamp']}, Average: {datapoint['Average']}")

Metrics:
Timestamp: 2024-10-14 15:21:00+00:00, Average: 12.0
Timestamp: 2024-10-14 15:31:00+00:00, Average: 12.0
Timestamp: 2024-10-14 15:31:00+00:00, Average: 250.0
Timestamp: 2024-10-14 15:21:00+00:00, Average: 250.0
