Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion sagemaker-core/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ description = "An python package for sagemaker core functionalities"
authors = [
{name = "AWS", email = "sagemaker-interests@amazon.com"}
]
readme = "README.rst"
readme = "README.rst"
dependencies = [
# Add your dependencies here (Include lower and upper bounds as applicable)
"boto3>=1.42.2,<2.0.0",
Expand All @@ -34,6 +34,10 @@ dependencies = [
"omegaconf>=2.1.0",
"torch>=1.9.0",
"scipy>=1.5.0",
# Remote function dependencies
"cloudpickle>=2.0.0",
"paramiko>=2.11.0",
"tblib>=1.7.0",
]
requires-python = ">=3.9"
classifiers = [
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -406,8 +406,8 @@ def retrieve_pytorch_uri(

return ECR_URI_TEMPLATE.format(registry=registry, hostname=hostname, repository=repo)

@override_pipeline_parameter_var
@staticmethod
@override_pipeline_parameter_var
def retrieve(
framework: str,
region: str,
Expand Down
7 changes: 4 additions & 3 deletions sagemaker-core/src/sagemaker/core/training/configs.py
Original file line number Diff line number Diff line change
Expand Up @@ -257,15 +257,16 @@ class InputData(BaseConfig):
Parameters:
channel_name (StrPipeVar):
The name of the input data source channel.
data_source (Union[str, S3DataSource, FileSystemDataSource, DatasetSource]):
data_source (Union[StrPipeVar, S3DataSource, FileSystemDataSource, DatasetSource]):
The data source for the channel. Can be an S3 URI string, local file path string,
S3DataSource object, or FileSystemDataSource object.
S3DataSource object, FileSystemDataSource object, DatasetSource object, or a
pipeline variable (Properties) from a previous step.
content_type (StrPipeVar):
The MIME type of the data.
"""

channel_name: StrPipeVar = None
data_source: Union[str, FileSystemDataSource, S3DataSource, DatasetSource] = None
data_source: Union[StrPipeVar, FileSystemDataSource, S3DataSource, DatasetSource] = None
content_type: StrPipeVar = None


Expand Down
4 changes: 4 additions & 0 deletions sagemaker-core/tests/integ/jumpstart/test_search_integ.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
from sagemaker.core.resources import HubContent


@pytest.mark.serial
@pytest.mark.integ
def test_search_public_hub_models_default_args():
# Only query, uses default hub name and session
Expand All @@ -30,6 +31,7 @@ def test_search_public_hub_models_default_args():
assert len(results) > 0, "Expected at least one matching model from the public hub"


@pytest.mark.serial
@pytest.mark.integ
def test_search_public_hub_models_custom_session():
# Provide a custom SageMaker session
Expand All @@ -41,6 +43,7 @@ def test_search_public_hub_models_custom_session():
assert all(isinstance(m, HubContent) for m in results)


@pytest.mark.serial
@pytest.mark.integ
def test_search_public_hub_models_custom_hub_name():
# Using the default public hub but provided explicitly
Expand All @@ -51,6 +54,7 @@ def test_search_public_hub_models_custom_hub_name():
assert all(isinstance(m, HubContent) for m in results)


@pytest.mark.serial
@pytest.mark.integ
def test_search_public_hub_models_all_args():
# Provide both hub_name and session explicitly
Expand Down
3 changes: 3 additions & 0 deletions sagemaker-core/tests/unit/local/test_image.py
Original file line number Diff line number Diff line change
Expand Up @@ -613,6 +613,7 @@ def test_process_with_multiple_inputs(self, mock_session):
"test-job",
)

@pytest.mark.skip(reason="Requires sagemaker-serve module which is not installed in sagemaker-core tests")
def test_train_with_multiple_channels(self, mock_session):
"""Test train method with multiple input channels"""
with patch(
Expand Down Expand Up @@ -701,6 +702,7 @@ def test_train_with_multiple_channels(self, mock_session):
== "/tmp/model.tar.gz"
)

@pytest.mark.skip(reason="Requires sagemaker-serve module which is not installed in sagemaker-core tests")
def test_serve_with_environment_variables(self, mock_session):
"""Test serve method with environment variables"""
with patch(
Expand Down Expand Up @@ -859,6 +861,7 @@ def test_write_config_files(self, mock_session):

assert mock_write.call_count == 3 # hyperparameters, resourceconfig, inputdataconfig

@pytest.mark.skip(reason="Requires sagemaker-serve module which is not installed in sagemaker-core tests")
def test_prepare_training_volumes_with_local_code(self, mock_session):
"""Test _prepare_training_volumes with local code directory"""
with patch(
Expand Down
14 changes: 9 additions & 5 deletions sagemaker-core/tests/unit/remote_function/test_job.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
import os
import pytest
import sys
from unittest.mock import Mock, patch, MagicMock, call
from unittest.mock import Mock, patch, MagicMock, call, mock_open
from io import BytesIO

from sagemaker.core.remote_function.job import (
Expand Down Expand Up @@ -632,8 +632,9 @@ class TestPrepareAndUploadRuntimeScripts:
@patch("sagemaker.core.remote_function.job.S3Uploader")
@patch("sagemaker.core.remote_function.job._tmpdir")
@patch("sagemaker.core.remote_function.job.shutil")
@patch("builtins.open", new_callable=mock_open)
def test_without_spark_or_distributed(
self, mock_shutil, mock_tmpdir, mock_uploader, mock_session
self, mock_file, mock_shutil, mock_tmpdir, mock_uploader, mock_session
):
"""Test without Spark or distributed training."""
mock_tmpdir.return_value.__enter__ = Mock(return_value="/tmp/test")
Expand All @@ -649,7 +650,8 @@ def test_without_spark_or_distributed(
@patch("sagemaker.core.remote_function.job.S3Uploader")
@patch("sagemaker.core.remote_function.job._tmpdir")
@patch("sagemaker.core.remote_function.job.shutil")
def test_with_spark(self, mock_shutil, mock_tmpdir, mock_uploader, mock_session):
@patch("builtins.open", new_callable=mock_open)
def test_with_spark(self, mock_file, mock_shutil, mock_tmpdir, mock_uploader, mock_session):
"""Test with Spark config."""
mock_tmpdir.return_value.__enter__ = Mock(return_value="/tmp/test")
mock_tmpdir.return_value.__exit__ = Mock(return_value=False)
Expand All @@ -665,7 +667,8 @@ def test_with_spark(self, mock_shutil, mock_tmpdir, mock_uploader, mock_session)
@patch("sagemaker.core.remote_function.job.S3Uploader")
@patch("sagemaker.core.remote_function.job._tmpdir")
@patch("sagemaker.core.remote_function.job.shutil")
def test_with_torchrun(self, mock_shutil, mock_tmpdir, mock_uploader, mock_session):
@patch("builtins.open", new_callable=mock_open)
def test_with_torchrun(self, mock_file, mock_shutil, mock_tmpdir, mock_uploader, mock_session):
"""Test with torchrun."""
mock_tmpdir.return_value.__enter__ = Mock(return_value="/tmp/test")
mock_tmpdir.return_value.__exit__ = Mock(return_value=False)
Expand All @@ -680,7 +683,8 @@ def test_with_torchrun(self, mock_shutil, mock_tmpdir, mock_uploader, mock_sessi
@patch("sagemaker.core.remote_function.job.S3Uploader")
@patch("sagemaker.core.remote_function.job._tmpdir")
@patch("sagemaker.core.remote_function.job.shutil")
def test_with_mpirun(self, mock_shutil, mock_tmpdir, mock_uploader, mock_session):
@patch("builtins.open", new_callable=mock_open)
def test_with_mpirun(self, mock_file, mock_shutil, mock_tmpdir, mock_uploader, mock_session):
"""Test with mpirun."""
mock_tmpdir.return_value.__enter__ = Mock(return_value="/tmp/test")
mock_tmpdir.return_value.__exit__ = Mock(return_value=False)
Expand Down
17 changes: 16 additions & 1 deletion sagemaker-core/tests/unit/telemetry/test_telemetry_logging.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,18 @@
PYTHON_VERSION,
)
from sagemaker.core.user_agent import SDK_VERSION, process_studio_metadata_file
from sagemaker.serve.utils.exceptions import ModelBuilderException, LocalModelOutOfMemoryException

# Try to import sagemaker-serve exceptions, skip tests if not available
try:
from sagemaker.serve.utils.exceptions import ModelBuilderException, LocalModelOutOfMemoryException
SAGEMAKER_SERVE_AVAILABLE = True
except ImportError:
SAGEMAKER_SERVE_AVAILABLE = False
# Create mock exceptions for type hints
class ModelBuilderException(Exception):
pass
class LocalModelOutOfMemoryException(Exception):
pass

MOCK_SESSION = Mock()
MOCK_EXCEPTION = LocalModelOutOfMemoryException("mock raise ex")
Expand Down Expand Up @@ -147,6 +158,10 @@ def test_telemetry_emitter_decorator_success(
1, [1, 2], MOCK_SESSION, None, None, expected_extra_str
)

@pytest.mark.skipif(
not SAGEMAKER_SERVE_AVAILABLE,
reason="Requires sagemaker-serve package"
)
@patch("sagemaker.core.telemetry.telemetry_logging._send_telemetry_request")
@patch("sagemaker.core.telemetry.telemetry_logging.resolve_value_from_config")
def test_telemetry_emitter_decorator_handle_exception_success(
Expand Down
1 change: 1 addition & 0 deletions sagemaker-core/tests/unit/test_jumpstart_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -1479,6 +1479,7 @@ def test_add_instance_rate_stats_none_metrics(self):
result = utils.add_instance_rate_stats_to_benchmark_metrics("us-west-2", None)
assert result is None

@pytest.mark.skip(reason="Requires AWS Pricing API permissions which are not available in CI environment")
@patch("sagemaker.core.common_utils.get_instance_rate_per_hour")
def test_add_instance_rate_stats_success(self, mock_get_rate):
"""Test successfully adding instance rate stats"""
Expand Down
3 changes: 3 additions & 0 deletions sagemaker-core/tests/unit/workflow/test_utilities.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ def to_request(self):
class TestWorkflowUtilities:
"""Test cases for workflow utility functions"""

@pytest.mark.skip(reason="Requires sagemaker-mlops module which is not installed in sagemaker-core tests")
def test_list_to_request_with_entities(self):
"""Test list_to_request with Entity objects"""
entities = [MockEntity(), MockEntity()]
Expand All @@ -53,6 +54,7 @@ def test_list_to_request_with_entities(self):
assert len(result) == 2
assert all(item["Type"] == "MockEntity" for item in result)

@pytest.mark.skip(reason="Requires sagemaker-mlops module which is not installed in sagemaker-core tests")
def test_list_to_request_with_step_collection(self):
"""Test list_to_request with StepCollection"""
from sagemaker.mlops.workflow.step_collections import StepCollection
Expand All @@ -64,6 +66,7 @@ def test_list_to_request_with_step_collection(self):

assert len(result) == 2

@pytest.mark.skip(reason="Requires sagemaker-mlops module which is not installed in sagemaker-core tests")
def test_list_to_request_mixed(self):
"""Test list_to_request with mixed entities and collections"""
from sagemaker.mlops.workflow.step_collections import StepCollection
Expand Down
1 change: 1 addition & 0 deletions sagemaker-core/tox.ini
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@ markers =
release
image_uris_unit_test
timeout: mark a test as a timeout.
serial: marks tests that must run serially (not in parallel)

[testenv]
setenv =
Expand Down
2 changes: 1 addition & 1 deletion sagemaker-mlops/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ build-backend = "setuptools.build_meta"
name = "sagemaker-mlops"
dynamic = ["version"]
description = "SageMaker MLOps package for workflow orchestration and model building"
readme = "README.md"
readme = "README.md"
requires-python = ">=3.9"
authors = [
{name = "Amazon Web Services"},
Expand Down
14 changes: 13 additions & 1 deletion sagemaker-mlops/tests/integ/test_pipeline_train_registry.py
Original file line number Diff line number Diff line change
Expand Up @@ -215,7 +215,19 @@ def test_pipeline_with_train_and_registry(sagemaker_session, pipeline_session, r
assert execution_status == "Succeeded"
break
elif execution_status in ["Failed", "Stopped"]:
pytest.fail(f"Pipeline execution {execution_status}")
# Get detailed failure information
steps = sagemaker_session.sagemaker_client.list_pipeline_execution_steps(
PipelineExecutionArn=execution_desc["PipelineExecutionArn"]
)["PipelineExecutionSteps"]

failed_steps = []
for step in steps:
if step.get("StepStatus") == "Failed":
failure_reason = step.get("FailureReason", "Unknown reason")
failed_steps.append(f"{step['StepName']}: {failure_reason}")

failure_details = "\n".join(failed_steps) if failed_steps else "No detailed failure information available"
pytest.fail(f"Pipeline execution {execution_status}. Failed steps:\n{failure_details}")

time.sleep(60)
else:
Expand Down
3 changes: 1 addition & 2 deletions sagemaker-mlops/tox.ini
Original file line number Diff line number Diff line change
Expand Up @@ -87,8 +87,7 @@ allowlist_externals =
commands =
python -c "import os; os.system('install-custom-pkgs --install-boto-wheels')"
pip install 'apache-airflow==2.10.4' --constraint "https://raw.githubusercontent.com/apache/airflow/constraints-2.10.4/constraints-3.9.txt"
pip install 'torch==2.3.1+cpu' -f 'https://download.pytorch.org/whl/torch_stable.html'
pip install 'torchvision==0.18.1+cpu' -f 'https://download.pytorch.org/whl/torch_stable.html'
pip install 'torch==2.8.0' 'torchvision==0.23.0'
pip install 'dill>=0.3.9'

pytest {posargs}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,6 @@ def __init__(
):
self._thread = None
self._loop = None
self._stop_event = asyncio.Event()
self._shutdown_event = threading.Event()
self._router = APIRouter()
self._task = task
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -577,13 +577,15 @@ def training_job(self, setup_region):
session=session,
region="us-east-1")

@pytest.mark.skip(reason="Bedrock Nova deployment test skipped per team decision")
def test_bedrock_model_builder_creation(self, training_job):
"""Test BedrockModelBuilder creation with Nova model."""
bedrock_builder = BedrockModelBuilder(model=training_job)
assert bedrock_builder is not None
assert bedrock_builder.model == training_job
assert bedrock_builder.s3_model_artifacts is not None

@pytest.mark.skip(reason="Bedrock Nova deployment test skipped per team decision")
@pytest.mark.slow
def test_nova_model_deployment(self, training_job):
"""Test Nova model deployment to Bedrock."""
Expand Down
8 changes: 1 addition & 7 deletions sagemaker-serve/tests/integ/test_tei_integration.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,9 +31,6 @@
MODEL_NAME_PREFIX = "tei-test-model"
ENDPOINT_NAME_PREFIX = "tei-test-endpoint"

# Configuration from backup file
AWS_REGION = "us-east-2"


@pytest.mark.slow_test
def test_tei_build_deploy_invoke_cleanup():
Expand Down Expand Up @@ -81,8 +78,6 @@ def build_and_deploy():
hf_model_id = MODEL_ID

schema_builder = create_schema_builder()
boto_session = boto3.Session(region_name=AWS_REGION)
sagemaker_session = Session(boto_session=boto_session)
unique_id = str(uuid.uuid4())[:8]

compute = Compute(
Expand All @@ -94,7 +89,6 @@ def build_and_deploy():
model=hf_model_id, # Use HuggingFace model string
model_server=ModelServer.TEI,
schema_builder=schema_builder,
sagemaker_session=sagemaker_session,
compute=compute,
)

Expand All @@ -104,7 +98,7 @@ def build_and_deploy():

core_endpoint = model_builder.deploy(
endpoint_name=f"{ENDPOINT_NAME_PREFIX}-{unique_id}",
initial_instance_count=1
initial_instance_count=1,
)
logger.info(f"Endpoint Successfully Created: {core_endpoint.endpoint_name}")

Expand Down
8 changes: 1 addition & 7 deletions sagemaker-serve/tests/integ/test_tgi_integration.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,9 +31,6 @@
MODEL_NAME_PREFIX = "tgi-test-model"
ENDPOINT_NAME_PREFIX = "tgi-test-endpoint"

# Configuration from backup file
AWS_REGION = "us-east-2"


@pytest.mark.slow_test
def test_tgi_build_deploy_invoke_cleanup():
Expand Down Expand Up @@ -81,8 +78,6 @@ def build_and_deploy():
hf_model_id = MODEL_ID

schema_builder = create_schema_builder()
boto_session = boto3.Session(region_name=AWS_REGION)
sagemaker_session = Session(boto_session=boto_session)
unique_id = str(uuid.uuid4())[:8]

compute = Compute(
Expand All @@ -101,7 +96,6 @@ def build_and_deploy():
model=hf_model_id, # Use HuggingFace model string
model_server=ModelServer.TGI,
schema_builder=schema_builder,
sagemaker_session=sagemaker_session,
compute=compute,
env_vars=env_vars
)
Expand All @@ -112,7 +106,7 @@ def build_and_deploy():

core_endpoint = model_builder.deploy(
endpoint_name=f"{ENDPOINT_NAME_PREFIX}-{unique_id}",
initial_instance_count=1
initial_instance_count=1,
)
logger.info(f"Endpoint Successfully Created: {core_endpoint.endpoint_name}")

Expand Down
30 changes: 16 additions & 14 deletions sagemaker-serve/tests/unit/test_model_builder_servers.py
Original file line number Diff line number Diff line change
Expand Up @@ -414,20 +414,22 @@ def test_all_supported_model_servers_have_routes(self):
"""Test that all supported model servers have corresponding build methods."""
from sagemaker.serve.model_builder_servers import _ModelBuilderServers

# Map of model servers to their expected build methods
server_method_map = {
ModelServer.TORCHSERVE: '_build_for_torchserve',
ModelServer.TRITON: '_build_for_triton',
ModelServer.TENSORFLOW_SERVING: '_build_for_tensorflow_serving',
ModelServer.DJL_SERVING: '_build_for_djl',
ModelServer.TEI: '_build_for_tei',
ModelServer.TGI: '_build_for_tgi',
ModelServer.MMS: '_build_for_transformers',
ModelServer.SMD: '_build_for_smd',
}

for model_server, method_name in server_method_map.items():
with self.subTest(model_server=model_server):
# Map of model servers to their expected build methods using string values
# to avoid enum serialization issues with pytest-xdist
server_method_map = [
(ModelServer.TORCHSERVE, '_build_for_torchserve'),
(ModelServer.TRITON, '_build_for_triton'),
(ModelServer.TENSORFLOW_SERVING, '_build_for_tensorflow_serving'),
(ModelServer.DJL_SERVING, '_build_for_djl'),
(ModelServer.TEI, '_build_for_tei'),
(ModelServer.TGI, '_build_for_tgi'),
(ModelServer.MMS, '_build_for_transformers'),
(ModelServer.SMD, '_build_for_smd'),
]

for model_server, method_name in server_method_map:
# Use enum.name instead of enum itself for subTest to avoid serialization
with self.subTest(model_server=model_server.name):
self.mock_builder.model_server = model_server

# Mock the specific build method
Expand Down
Loading
Loading