From 82bd803f253aecbaf247e016d706e4a731a00ffe Mon Sep 17 00:00:00 2001 From: Dana Benson Date: Fri, 12 Sep 2025 17:38:14 -0700 Subject: [PATCH] fix: handle trial component status message longer than API supports --- src/sagemaker/experiments/run.py | 3 ++- tests/unit/sagemaker/experiments/test_run.py | 16 ++++++++++++++++ 2 files changed, 18 insertions(+), 1 deletion(-) diff --git a/src/sagemaker/experiments/run.py b/src/sagemaker/experiments/run.py index 33f2f0bbdc..cea6043eb0 100644 --- a/src/sagemaker/experiments/run.py +++ b/src/sagemaker/experiments/run.py @@ -68,6 +68,7 @@ TRIAL_NAME_TEMPLATE = "Default-Run-Group-{}" MAX_RUN_TC_ARTIFACTS_LEN = 30 MAX_NAME_LEN_IN_BACKEND = 120 +MAX_STATUS_MESSAGE_LEN = 1024 EXPERIMENT_NAME = "ExperimentName" TRIAL_NAME = "TrialName" RUN_NAME = "RunName" @@ -759,7 +760,7 @@ def __exit__(self, exc_type, exc_value, exc_traceback): if exc_value: self._trial_component.status = _api_types.TrialComponentStatus( primary_status=_TrialComponentStatusType.Failed.value, - message=str(exc_value), + message=(str(exc_value) or "")[:MAX_STATUS_MESSAGE_LEN], ) else: self._trial_component.status = _api_types.TrialComponentStatus( diff --git a/tests/unit/sagemaker/experiments/test_run.py b/tests/unit/sagemaker/experiments/test_run.py index 2bebbe3d9c..5b72cca41b 100644 --- a/tests/unit/sagemaker/experiments/test_run.py +++ b/tests/unit/sagemaker/experiments/test_run.py @@ -1078,6 +1078,22 @@ def test_exit_fail(sagemaker_session, run_obj): assert isinstance(run_obj._trial_component.end_time, datetime.datetime) +def test_exit_fail_message_too_long(sagemaker_session, run_obj): + sagemaker_session.sagemaker_client.update_trial_component.return_value = {} + # create an error message that is longer than the max status message length of 1024 + # 3 x 342 = 1026 + too_long_error_message = "Foo" * 342 + try: + with run_obj: + raise ValueError(too_long_error_message) + except ValueError: + pass + + assert run_obj._trial_component.status.primary_status == _TrialComponentStatusType.Failed.value + assert run_obj._trial_component.status.message == too_long_error_message[:1024] + assert isinstance(run_obj._trial_component.end_time, datetime.datetime) + + @pytest.mark.parametrize( "metric_value", [1.3, "nan", "inf", "-inf", None],