Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
…tools-python into develop

* 'develop' of https://github.com/awslabs/aws-lambda-powertools-python: (104 commits)
  feat: add metrics metadata (#81)
  chore: cleanup tests (#79)
  chore: remove deprecated code before GA (#78)
  docs: customize contributing guide (#77)
  chore: move blockquotes as hidden comments
  chore: update CHANGELOG
  chore: bump version to 0.11.0 (#76)
  chore: version bump 0.10.1
  fix: default dimension creation now happens when metrics are serialized instead of on metrics constructor (#74)
  fix: default dimension creation now happens when metrics are serialized instead of on metrics constructor (#74)
  docs: fix contrast on highlighted code text (#73)
  feat: improve error handling for log_metrics decorator (#71)
  chore(deps): bump graphql-playground-html from 1.6.19 to 1.6.25 in /docs
  feat: add high level imports (#70)
  fix: correct env var name for publish to pypi test (#69)
  chore: version bump (#68)
  feat: add capture_cold_start_metric for log_metrics (#67)
  chore(deps): bump websocket-extensions from 0.1.3 to 0.1.4 in /docs (#66)
  feat: automate publishing to pypi (#58)
  feat: add pre-commit hooks (#64)
  ...
  • Loading branch information
heitorlessa committed Jun 17, 2020
2 parents efe5e28 + f5aab39 commit a8e8e50
Show file tree
Hide file tree
Showing 4 changed files with 224 additions and 22 deletions.
85 changes: 66 additions & 19 deletions aws_lambda_powertools/metrics/base.py
Expand Up @@ -5,7 +5,7 @@
import os
import pathlib
from enum import Enum
from typing import Dict, List, Union
from typing import Any, Dict, List, Union

import fastjsonschema

Expand Down Expand Up @@ -78,14 +78,20 @@ class MetricManager:
"""

def __init__(
self, metric_set: Dict[str, str] = None, dimension_set: Dict = None, namespace: str = None, service: str = None
self,
metric_set: Dict[str, str] = None,
dimension_set: Dict = None,
namespace: str = None,
metadata_set: Dict[str, Any] = None,
service: str = None,
):
self.metric_set = metric_set if metric_set is not None else {}
self.dimension_set = dimension_set if dimension_set is not None else {}
self.namespace = namespace or os.getenv("POWERTOOLS_METRICS_NAMESPACE")
self.service = service or os.environ.get("POWERTOOLS_SERVICE_NAME")
self._metric_units = [unit.value for unit in MetricUnit]
self._metric_unit_options = list(MetricUnit.__members__)
self.metadata_set = self.metadata_set if metadata_set is not None else {}

def add_metric(self, name: str, unit: MetricUnit, value: Union[float, int]):
"""Adds given metric
Expand Down Expand Up @@ -131,7 +137,7 @@ def add_metric(self, name: str, unit: MetricUnit, value: Union[float, int]):
# since we could have more than 100 metrics
self.metric_set.clear()

def serialize_metric_set(self, metrics: Dict = None, dimensions: Dict = None) -> Dict:
def serialize_metric_set(self, metrics: Dict = None, dimensions: Dict = None, metadata: Dict = None) -> Dict:
"""Serializes metric and dimensions set
Parameters
Expand Down Expand Up @@ -165,39 +171,48 @@ def serialize_metric_set(self, metrics: Dict = None, dimensions: Dict = None) ->
if dimensions is None: # pragma: no cover
dimensions = self.dimension_set

if metadata is None: # pragma: no cover
metadata = self.metadata_set

if self.service and not self.dimension_set.get("service"):
self.dimension_set["service"] = self.service

logger.debug("Serializing...", {"metrics": metrics, "dimensions": dimensions})

dimension_keys: List[str] = list(dimensions.keys())
metric_names_unit: List[Dict[str, str]] = []
metric_set: Dict[str, str] = {}
metric_names_and_units: List[Dict[str, str]] = [] # [ { "Name": "metric_name", "Unit": "Count" } ]
metric_names_and_values: Dict[str, str] = {} # { "metric_name": 1.0 }

for metric_name in metrics:
metric: str = metrics[metric_name]
metric_value: int = metric.get("Value", 0)
metric_unit: str = metric.get("Unit", "")

metric_names_unit.append({"Name": metric_name, "Unit": metric_unit})
metric_set.update({metric_name: metric_value})

metrics_definition = {
"CloudWatchMetrics": [
{"Namespace": self.namespace, "Dimensions": [dimension_keys], "Metrics": metric_names_unit}
]
metric_names_and_units.append({"Name": metric_name, "Unit": metric_unit})
metric_names_and_values.update({metric_name: metric_value})

embedded_metrics_object = {
"_aws": {
"Timestamp": int(datetime.datetime.now().timestamp() * 1000), # epoch
"CloudWatchMetrics": [
{
"Namespace": self.namespace, # "test_namespace"
"Dimensions": [list(dimensions.keys())], # [ "service" ]
"Metrics": metric_names_and_units,
}
],
},
**dimensions, # "service": "test_service"
**metadata, # "username": "test"
**metric_names_and_values, # "single_metric": 1.0
}
metrics_timestamp = {"Timestamp": int(datetime.datetime.now().timestamp() * 1000)}
metric_set["_aws"] = {**metrics_timestamp, **metrics_definition}
metric_set.update(**dimensions)

try:
logger.debug("Validating serialized metrics against CloudWatch EMF schema", metric_set)
fastjsonschema.validate(definition=CLOUDWATCH_EMF_SCHEMA, data=metric_set)
logger.debug("Validating serialized metrics against CloudWatch EMF schema", embedded_metrics_object)
fastjsonschema.validate(definition=CLOUDWATCH_EMF_SCHEMA, data=embedded_metrics_object)
except fastjsonschema.JsonSchemaException as e:
message = f"Invalid format. Error: {e.message}, Invalid item: {e.name}" # noqa: B306, E501
raise SchemaValidationError(message)
return metric_set
return embedded_metrics_object

def add_dimension(self, name: str, value: str):
"""Adds given dimension to all metrics
Expand Down Expand Up @@ -225,6 +240,38 @@ def add_dimension(self, name: str, value: str):
else:
self.dimension_set[name] = str(value)

def add_metadata(self, key: str, value: Any):
"""Adds high cardinal metadata for metrics object
This will not be available during metrics visualization.
Instead, this will be searchable through logs.
If you're looking to add metadata to filter metrics, then
use add_dimensions method.
Example
-------
**Add metrics metadata**
metric.add_metadata(key="booking_id", value="booking_id")
Parameters
----------
name : str
Metadata key
value : any
Metadata value
"""
logger.debug(f"Adding metadata: {key}:{value}")

# Cast key to str according to EMF spec
# Majority of keys are expected to be string already, so
# checking before casting improves performance in most cases
if isinstance(key, str):
self.metadata_set[key] = value
else:
self.metadata_set[str(key)] = value

def __extract_metric_unit_value(self, unit: Union[str, MetricUnit]) -> str:
"""Return metric value from metric unit whether that's str or MetricUnit enum
Expand Down
10 changes: 9 additions & 1 deletion aws_lambda_powertools/metrics/metrics.py
Expand Up @@ -67,20 +67,28 @@ def do_something():

_metrics = {}
_dimensions = {}
_metadata = {}

def __init__(self, service: str = None, namespace: str = None):
self.metric_set = self._metrics
self.dimension_set = self._dimensions
self.service = service
self.namespace = namespace
self.metadata_set = self._metadata

super().__init__(
metric_set=self.metric_set, dimension_set=self.dimension_set, namespace=self.namespace, service=self.service
metric_set=self.metric_set,
dimension_set=self.dimension_set,
namespace=self.namespace,
metadata_set=self.metadata_set,
service=self.service,
)

def clear_metrics(self):
logger.debug("Clearing out existing metric set from memory")
self.metric_set.clear()
self.dimension_set.clear()
self.metadata_set.clear()

def log_metrics(
self,
Expand Down
50 changes: 50 additions & 0 deletions docs/content/core/metrics.mdx
Expand Up @@ -89,6 +89,56 @@ with single_metric(name="ColdStart", unit=MetricUnit.Count, value=1, namespace="
...
```

## Adding metadata

You can use `add_metadata` for advanced use cases, where you want to metadata as part of the serialized metrics object.

<Note type="info">
<strong>This will not be available during metrics visualization</strong> - Use <strong>dimensions</strong> for this purpose
</Note><br/>

```python:title=app.py
from aws_lambda_powertools import Metrics
from aws_lambda_powertools.metrics import MetricUnit

metrics = Metrics(namespace="ExampleApplication", service="booking")
metrics.add_metric(name="SuccessfulBooking", unit=MetricUnit.Count, value=1)
metrics.add_metadata(key="booking_id", value="booking_uuid") # highlight-line
```

This will be available in CloudWatch Logs to ease operations on high cardinal data.

<details>
<summary><strong>Exerpt output in CloudWatch Logs</strong></summary>

```json:title=cloudwatch_logs.json
{
"SuccessfulBooking": 1.0,
"_aws": {
"Timestamp": 1592234975665,
"CloudWatchMetrics": [
{
"Namespace": "ExampleApplication",
"Dimensions": [
[
"service"
]
],
"Metrics": [
{
"Name": "SuccessfulBooking",
"Unit": "Count"
}
]
}
]
},
"service": "booking",
"booking_id": "booking_uuid" // highlight-line
}
```
</details>

## Flushing metrics

As you finish adding all your metrics, you need to serialize and flush them to standard output. You can do that right before you return your response to the caller via `log_metrics`.
Expand Down
101 changes: 99 additions & 2 deletions tests/functional/test_metrics.py
Expand Up @@ -61,12 +61,19 @@ def service() -> str:
return "test_service"


@pytest.fixture
def metadata() -> Dict[str, str]:
return {"key": "username", "value": "test"}


@pytest.fixture
def a_hundred_metrics(namespace=namespace) -> List[Dict[str, str]]:
return [{"name": f"metric_{i}", "unit": "Count", "value": 1} for i in range(100)]


def serialize_metrics(metrics: List[Dict], dimensions: List[Dict], namespace: str) -> Dict:
def serialize_metrics(
metrics: List[Dict], dimensions: List[Dict], namespace: str, metadatas: List[Dict] = None
) -> Dict:
""" Helper function to build EMF object from a list of metrics, dimensions """
my_metrics = MetricManager(namespace=namespace)
for dimension in dimensions:
Expand All @@ -75,15 +82,23 @@ def serialize_metrics(metrics: List[Dict], dimensions: List[Dict], namespace: st
for metric in metrics:
my_metrics.add_metric(**metric)

if metadatas is not None:
for metadata in metadatas:
my_metrics.add_metadata(**metadata)

if len(metrics) != 100:
return my_metrics.serialize_metric_set()


def serialize_single_metric(metric: Dict, dimension: Dict, namespace: str) -> Dict:
def serialize_single_metric(metric: Dict, dimension: Dict, namespace: str, metadata: Dict = None) -> Dict:
""" Helper function to build EMF object from a given metric, dimension and namespace """
my_metrics = MetricManager(namespace=namespace)
my_metrics.add_metric(**metric)
my_metrics.add_dimension(**dimension)

if metadata is not None:
my_metrics.add_metadata(**metadata)

return my_metrics.serialize_metric_set()


Expand Down Expand Up @@ -533,3 +548,85 @@ def lambda_handler(evt, ctx):

for metric_record in second_output["_aws"]["CloudWatchMetrics"]:
assert ["service"] in metric_record["Dimensions"]


def test_add_metadata_non_string_dimension_keys(service, metric, namespace):
# GIVEN Metrics is initialized
my_metrics = Metrics(service=service, namespace=namespace)
my_metrics.add_metric(**metric)

# WHEN we utilize add_metadata with non-string keys
my_metrics.add_metadata(key=10, value="number_ten")

# THEN we should have no exceptions
# and dimension values should be serialized as strings
expected_metadata = {"10": "number_ten"}
assert my_metrics.metadata_set == expected_metadata


def test_add_metadata(service, metric, namespace, metadata):
# GIVEN Metrics is initialized
my_metrics = Metrics(service=service, namespace=namespace)
my_metrics.add_metric(**metric)

# WHEN we utilize add_metadata with non-string keys
my_metrics.add_metadata(**metadata)

# THEN we should have no exceptions
# and dimension values should be serialized as strings
assert my_metrics.metadata_set == {metadata["key"]: metadata["value"]}


def test_log_metrics_with_metadata(capsys, metric, dimension, namespace, service, metadata):
# GIVEN Metrics is initialized
my_metrics = Metrics(namespace=namespace)
my_metrics.add_metric(**metric)
my_metrics.add_dimension(**dimension)

# WHEN we utilize log_metrics to serialize and add metadata
@my_metrics.log_metrics
def lambda_handler(evt, ctx):
my_metrics.add_metadata(**metadata)
pass

lambda_handler({}, {})

output = capture_metrics_output(capsys)
expected = serialize_single_metric(metric=metric, dimension=dimension, namespace=namespace, metadata=metadata)

# THEN we should have no exceptions and metadata
remove_timestamp(metrics=[output, expected])
assert expected == output


def test_serialize_metric_set_metric_definition(metric, dimension, namespace, service, metadata):
expected_metric_definition = {
"single_metric": 1.0,
"_aws": {
"Timestamp": 1592237875494,
"CloudWatchMetrics": [
{
"Namespace": "test_namespace",
"Dimensions": [["test_dimension", "service"]],
"Metrics": [{"Name": "single_metric", "Unit": "Count"}],
}
],
},
"service": "test_service",
"username": "test",
"test_dimension": "test",
}

# GIVEN Metrics is initialized
my_metrics = Metrics(service=service, namespace=namespace)
my_metrics.add_metric(**metric)
my_metrics.add_dimension(**dimension)
my_metrics.add_metadata(**metadata)

# WHEN metrics are serialized manually
metric_definition_output = my_metrics.serialize_metric_set()

# THEN we should emit a valid embedded metric definition object
assert "Timestamp" in metric_definition_output["_aws"]
remove_timestamp(metrics=[metric_definition_output, expected_metric_definition])
assert metric_definition_output == expected_metric_definition

0 comments on commit a8e8e50

Please sign in to comment.