Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Telemetry metrics #4414

Merged
merged 16 commits into from
Feb 14, 2024
2 changes: 1 addition & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -34,4 +34,4 @@ env/
**/_repack_script_launcher.sh
tests/data/**/_repack_model.py
tests/data/experiment/sagemaker-dev-1.0.tar.gz
src/sagemaker/serve/tmp_workspace
src/sagemaker/serve/tmp_workspace
23 changes: 21 additions & 2 deletions src/sagemaker/serve/utils/telemetry_logger.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,12 +13,15 @@
"""Placeholder docstring"""
from __future__ import absolute_import
import logging
from time import perf_counter

import requests

from sagemaker import Session
from sagemaker import Session, exceptions
from sagemaker.serve.mode.function_pointers import Mode
from sagemaker.serve.utils.exceptions import ModelBuilderException
from sagemaker.serve.utils.types import ModelServer
from sagemaker.user_agent import SDK_VERSION

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -63,13 +66,21 @@ def wrapper(self, *args, **kwargs):
f"{func_name}"
f"&x-modelServer={MODEL_SERVER_TO_CODE[str(self.model_server)]}"
f"&x-imageTag={image_uri_tail}"
f"&x-sdkVersion={SDK_VERSION}"
)

if self.model_server == ModelServer.DJL_SERVING or self.model_server == ModelServer.TGI:
extra += f"&x-modelName={self.model}"

if self.sagemaker_session and self.sagemaker_session.endpoint_arn:
extra += f"&x-endpointArn={self.sagemaker_session.endpoint_arn}"

start_timer = perf_counter()
makungaj1 marked this conversation as resolved.
Show resolved Hide resolved
try:
response = func(self, *args, **kwargs)
stop_timer = perf_counter()
elapsed = stop_timer - start_timer
extra += f"&x-latency={round(elapsed, 2)}"
if not self.serve_settings.telemetry_opt_out:
_send_telemetry(
"1",
Expand All @@ -79,7 +90,15 @@ def wrapper(self, *args, **kwargs):
None,
extra,
)
except ModelBuilderException as e:
except (
makungaj1 marked this conversation as resolved.
Show resolved Hide resolved
ModelBuilderException,
exceptions.CapacityError,
makungaj1 marked this conversation as resolved.
Show resolved Hide resolved
exceptions.UnexpectedStatusException,
exceptions.AsyncInferenceError,
) as e:
stop_timer = perf_counter()
elapsed = stop_timer - start_timer
extra += f"&x-latency={round(elapsed, 2)}"
if not self.serve_settings.telemetry_opt_out:
_send_telemetry(
"0",
Expand Down
10 changes: 8 additions & 2 deletions src/sagemaker/session.py
Original file line number Diff line number Diff line change
Expand Up @@ -243,6 +243,7 @@ def __init__(
# sagemaker_config is validated and initialized inside :func:`_initialize`,
# so if default_bucket is None and the sagemaker_config has a default S3 bucket configured,
# _default_bucket_name_override will be set again inside :func:`_initialize`.
self.endpoint_arn = None
self._default_bucket = None
self._default_bucket_name_override = default_bucket
# this may also be set again inside :func:`_initialize` if it is None
Expand Down Expand Up @@ -4284,9 +4285,12 @@ def create_endpoint(self, endpoint_name, config_name, tags=None, wait=True, live
tags, "{}.{}.{}".format(SAGEMAKER, ENDPOINT, TAGS)
)

self.sagemaker_client.create_endpoint(
res = self.sagemaker_client.create_endpoint(
EndpointName=endpoint_name, EndpointConfigName=config_name, Tags=tags
)
if res:
makungaj1 marked this conversation as resolved.
Show resolved Hide resolved
self.endpoint_arn = res["EndpointArn"]

if wait:
self.wait_for_endpoint(endpoint_name, live_logging=live_logging)
return endpoint_name
Expand Down Expand Up @@ -4344,9 +4348,11 @@ def update_endpoint(self, endpoint_name, endpoint_config_name, wait=True):
"existing endpoint name".format(endpoint_name)
)

self.sagemaker_client.update_endpoint(
res = self.sagemaker_client.update_endpoint(
EndpointName=endpoint_name, EndpointConfigName=endpoint_config_name
)
if res:
makungaj1 marked this conversation as resolved.
Show resolved Hide resolved
self.endpoint_arn = res["EndpointArn"]

if wait:
self.wait_for_endpoint(endpoint_name)
Expand Down
23 changes: 23 additions & 0 deletions tests/unit/sagemaker/serve/utils/test_telemetry_logger.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
_construct_url,
)
from sagemaker.serve.utils.exceptions import ModelBuilderException, LocalModelOutOfMemoryException
from sagemaker.user_agent import SDK_VERSION

MOCK_SESSION = Mock()
MOCK_FUNC_NAME = "Mock.deploy"
Expand All @@ -32,6 +33,7 @@
)
MOCK_HUGGINGFACE_ID = "meta-llama/Llama-2-7b-hf"
MOCK_EXCEPTION = LocalModelOutOfMemoryException("mock raise ex")
MOCK_ENDPOINT_ARN = "arn:aws:sagemaker:us-west-2:123456789012:endpoint/test"


class ModelBuilderMock:
Expand Down Expand Up @@ -72,15 +74,22 @@ def test_capture_telemetry_decorator_djl_success(self, mock_send_telemetry):
mock_model_builder.model = MOCK_HUGGINGFACE_ID
mock_model_builder.mode = Mode.LOCAL_CONTAINER
mock_model_builder.model_server = ModelServer.DJL_SERVING
mock_model_builder.sagemaker_session.endpoint_arn = MOCK_ENDPOINT_ARN

mock_model_builder.mock_deploy()

args = mock_send_telemetry.call_args.args
latency = str(args[5]).split("latency=")[1]
expected_extra_str = (
f"{MOCK_FUNC_NAME}"
"&x-modelServer=4"
"&x-imageTag=djl-inference:0.25.0-deepspeed0.11.0-cu118"
f"&x-sdkVersion={SDK_VERSION}"
f"&x-modelName={MOCK_HUGGINGFACE_ID}"
f"&x-endpointArn={MOCK_ENDPOINT_ARN}"
f"&x-latency={latency}"
)

mock_send_telemetry.assert_called_once_with(
"1", 2, MOCK_SESSION, None, None, expected_extra_str
)
Expand All @@ -93,15 +102,22 @@ def test_capture_telemetry_decorator_tgi_success(self, mock_send_telemetry):
mock_model_builder.model = MOCK_HUGGINGFACE_ID
mock_model_builder.mode = Mode.LOCAL_CONTAINER
mock_model_builder.model_server = ModelServer.TGI
mock_model_builder.sagemaker_session.endpoint_arn = MOCK_ENDPOINT_ARN

mock_model_builder.mock_deploy()

args = mock_send_telemetry.call_args.args
latency = str(args[5]).split("latency=")[1]
expected_extra_str = (
f"{MOCK_FUNC_NAME}"
"&x-modelServer=6"
"&x-imageTag=huggingface-pytorch-inference:2.0.0-transformers4.28.1-cpu-py310-ubuntu20.04"
f"&x-sdkVersion={SDK_VERSION}"
f"&x-modelName={MOCK_HUGGINGFACE_ID}"
f"&x-endpointArn={MOCK_ENDPOINT_ARN}"
f"&x-latency={latency}"
)

mock_send_telemetry.assert_called_once_with(
"1", 2, MOCK_SESSION, None, None, expected_extra_str
)
Expand All @@ -126,6 +142,7 @@ def test_capture_telemetry_decorator_handle_exception_success(self, mock_send_te
mock_model_builder.model = MOCK_HUGGINGFACE_ID
mock_model_builder.mode = Mode.LOCAL_CONTAINER
mock_model_builder.model_server = ModelServer.DJL_SERVING
mock_model_builder.sagemaker_session.endpoint_arn = MOCK_ENDPOINT_ARN

mock_exception = Mock()
mock_exception_obj = MOCK_EXCEPTION
Expand All @@ -134,12 +151,18 @@ def test_capture_telemetry_decorator_handle_exception_success(self, mock_send_te
with self.assertRaises(ModelBuilderException) as _:
mock_model_builder.mock_deploy(mock_exception)

args = mock_send_telemetry.call_args.args
latency = str(args[5]).split("latency=")[1]
expected_extra_str = (
f"{MOCK_FUNC_NAME}"
"&x-modelServer=4"
"&x-imageTag=djl-inference:0.25.0-deepspeed0.11.0-cu118"
f"&x-sdkVersion={SDK_VERSION}"
f"&x-modelName={MOCK_HUGGINGFACE_ID}"
f"&x-endpointArn={MOCK_ENDPOINT_ARN}"
f"&x-latency={latency}"
)

mock_send_telemetry.assert_called_once_with(
"0",
2,
Expand Down