From 5741f33d4fa92af5c3c46e058bceec23899e5e0d Mon Sep 17 00:00:00 2001 From: sagemaker-bot Date: Thu, 27 Feb 2025 10:09:29 +0000 Subject: [PATCH] Daily Sync with Botocore v1.37.2 on 2025/02/27 --- sample/sagemaker/2017-07-24/service-2.json | 85 +++++++++++++++++-- .../main/code_injection/shape_dag.py | 53 ++++++++++++ src/sagemaker_core/main/resources.py | 9 +- src/sagemaker_core/main/shapes.py | 57 ++++++++++++- 4 files changed, 192 insertions(+), 12 deletions(-) diff --git a/sample/sagemaker/2017-07-24/service-2.json b/sample/sagemaker/2017-07-24/service-2.json index 5edc7d99..84607689 100644 --- a/sample/sagemaker/2017-07-24/service-2.json +++ b/sample/sagemaker/2017-07-24/service-2.json @@ -6513,7 +6513,7 @@ "BatchDeleteClusterNodesErrorList":{ "type":"list", "member":{"shape":"BatchDeleteClusterNodesError"}, - "max":99, + "max":3000, "min":1 }, "BatchDeleteClusterNodesRequest":{ @@ -6529,7 +6529,7 @@ }, "NodeIds":{ "shape":"ClusterNodeIds", - "documentation":"

A list of node IDs to be deleted from the specified cluster.

For SageMaker HyperPod clusters using the Slurm workload manager, you cannot remove instances that are configured as Slurm controller nodes.

" + "documentation":"

A list of node IDs to be deleted from the specified cluster.

" } } }, @@ -7986,7 +7986,7 @@ "ClusterNodeIds":{ "type":"list", "member":{"shape":"ClusterNodeId"}, - "max":99, + "max":3000, "min":1 }, "ClusterNodeRecovery":{ @@ -15959,6 +15959,10 @@ "InferenceComponentStatus":{ "shape":"InferenceComponentStatus", "documentation":"

The status of the inference component.

" + }, + "LastDeploymentConfig":{ + "shape":"InferenceComponentDeploymentConfig", + "documentation":"

The deployment and rollback settings that you assigned to the inference component.

" } } }, @@ -19977,7 +19981,7 @@ "documentation":"

The list of tags that are associated with the experiment. You can use Search API to search on the tags.

" } }, - "documentation":"

The properties of an experiment as returned by the Search API.

" + "documentation":"

The properties of an experiment as returned by the Search API. For information about experiments, see the CreateExperiment API.

" }, "ExperimentArn":{ "type":"string", @@ -22669,6 +22673,31 @@ "max":2048, "min":20 }, + "InferenceComponentCapacitySize":{ + "type":"structure", + "required":[ + "Type", + "Value" + ], + "members":{ + "Type":{ + "shape":"InferenceComponentCapacitySizeType", + "documentation":"

Specifies the endpoint capacity type.

COPY_COUNT

The endpoint activates based on the number of inference component copies.

CAPACITY_PERCENT

The endpoint activates based on the specified percentage of capacity.

" + }, + "Value":{ + "shape":"CapacitySizeValue", + "documentation":"

Defines the capacity size, either as a number of inference component copies or a capacity percentage.

" + } + }, + "documentation":"

Specifies the type and size of the endpoint capacity to activate for a rolling deployment or a rollback strategy. You can specify your batches as either of the following:

For a rollback strategy, if you don't specify the fields in this object, or if you set the Value parameter to 100%, then SageMaker AI uses a blue/green rollback strategy and rolls all traffic back to the blue fleet.

" + }, + "InferenceComponentCapacitySizeType":{ + "type":"string", + "enum":[ + "COPY_COUNT", + "CAPACITY_PERCENT" + ] + }, "InferenceComponentComputeResourceRequirements":{ "type":"structure", "required":["MinMemoryRequiredInMb"], @@ -22729,6 +22758,18 @@ "type":"integer", "min":0 }, + "InferenceComponentDeploymentConfig":{ + "type":"structure", + "required":["RollingUpdatePolicy"], + "members":{ + "RollingUpdatePolicy":{ + "shape":"InferenceComponentRollingUpdatePolicy", + "documentation":"

Specifies a rolling deployment strategy for updating a SageMaker AI endpoint.

" + }, + "AutoRollbackConfiguration":{"shape":"AutoRollbackConfig"} + }, + "documentation":"

The deployment configuration for an endpoint that hosts inference components. The configuration includes the desired deployment strategy and rollback settings.

" + }, "InferenceComponentName":{ "type":"string", "max":63, @@ -22739,6 +22780,32 @@ "max":63, "pattern":"[a-zA-Z0-9-]+" }, + "InferenceComponentRollingUpdatePolicy":{ + "type":"structure", + "required":[ + "MaximumBatchSize", + "WaitIntervalInSeconds" + ], + "members":{ + "MaximumBatchSize":{ + "shape":"InferenceComponentCapacitySize", + "documentation":"

The batch size for each rolling step in the deployment process. For each step, SageMaker AI provisions capacity on the new endpoint fleet, routes traffic to that fleet, and terminates capacity on the old endpoint fleet. The value must be between 5% to 50% of the copy count of the inference component.

" + }, + "WaitIntervalInSeconds":{ + "shape":"WaitIntervalInSeconds", + "documentation":"

The length of the baking period, during which SageMaker AI monitors alarms for each batch on the new fleet.

" + }, + "MaximumExecutionTimeoutInSeconds":{ + "shape":"MaximumExecutionTimeoutInSeconds", + "documentation":"

The time limit for the total deployment. Exceeding this limit causes a timeout.

" + }, + "RollbackMaximumBatchSize":{ + "shape":"InferenceComponentCapacitySize", + "documentation":"

The batch size for a rollback to the old endpoint fleet. If this field is absent, the value is set to the default, which is 100% of the total capacity. When the default is used, SageMaker AI provisions the entire capacity of the old fleet at once during rollback.

" + } + }, + "documentation":"

Specifies a rolling deployment strategy for updating a SageMaker AI inference component.

" + }, "InferenceComponentRuntimeConfig":{ "type":"structure", "required":["CopyCount"], @@ -29780,7 +29847,7 @@ "members":{ "ModelPackageName":{ "shape":"EntityName", - "documentation":"

The name of the model.

" + "documentation":"

The name of the model package. The name can be as follows:

" }, "ModelPackageGroupName":{ "shape":"EntityName", @@ -29897,7 +29964,7 @@ "documentation":"

Indicates if you want to skip model validation.

" } }, - "documentation":"

A versioned model that can be deployed for SageMaker inference.

" + "documentation":"

A container for your trained model that can be deployed for SageMaker inference. This can include inference code, artifacts, and metadata. The model package type can be one of the following.

For more information, see CreateModelPackage .

" }, "ModelPackageArn":{ "type":"string", @@ -30011,7 +30078,7 @@ "documentation":"

A list of the tags associated with the model group. For more information, see Tagging Amazon Web Services resources in the Amazon Web Services General Reference Guide.

" } }, - "documentation":"

A group of versioned models in the model registry.

" + "documentation":"

A group of versioned models in the Model Registry.

" }, "ModelPackageGroupArn":{ "type":"string", @@ -40889,6 +40956,10 @@ "RuntimeConfig":{ "shape":"InferenceComponentRuntimeConfig", "documentation":"

Runtime settings for a model that is deployed with an inference component.

" + }, + "DeploymentConfig":{ + "shape":"InferenceComponentDeploymentConfig", + "documentation":"

The deployment configuration for the inference component. The configuration contains the desired deployment strategy and rollback settings.

" } } }, diff --git a/src/sagemaker_core/main/code_injection/shape_dag.py b/src/sagemaker_core/main/code_injection/shape_dag.py index 177124ed..ef2c6349 100644 --- a/src/sagemaker_core/main/code_injection/shape_dag.py +++ b/src/sagemaker_core/main/code_injection/shape_dag.py @@ -4911,6 +4911,11 @@ "shape": "InferenceComponentStatus", "type": "string", }, + { + "name": "LastDeploymentConfig", + "shape": "InferenceComponentDeploymentConfig", + "type": "structure", + }, ], "type": "structure", }, @@ -7670,6 +7675,13 @@ ], "type": "structure", }, + "InferenceComponentCapacitySize": { + "members": [ + {"name": "Type", "shape": "InferenceComponentCapacitySizeType", "type": "string"}, + {"name": "Value", "shape": "CapacitySizeValue", "type": "integer"}, + ], + "type": "structure", + }, "InferenceComponentComputeResourceRequirements": { "members": [ {"name": "NumberOfCpuCoresRequired", "shape": "NumberOfCpuCores", "type": "float"}, @@ -7699,6 +7711,42 @@ ], "type": "structure", }, + "InferenceComponentDeploymentConfig": { + "members": [ + { + "name": "RollingUpdatePolicy", + "shape": "InferenceComponentRollingUpdatePolicy", + "type": "structure", + }, + { + "name": "AutoRollbackConfiguration", + "shape": "AutoRollbackConfig", + "type": "structure", + }, + ], + "type": "structure", + }, + "InferenceComponentRollingUpdatePolicy": { + "members": [ + { + "name": "MaximumBatchSize", + "shape": "InferenceComponentCapacitySize", + "type": "structure", + }, + {"name": "WaitIntervalInSeconds", "shape": "WaitIntervalInSeconds", "type": "integer"}, + { + "name": "MaximumExecutionTimeoutInSeconds", + "shape": "MaximumExecutionTimeoutInSeconds", + "type": "integer", + }, + { + "name": "RollbackMaximumBatchSize", + "shape": "InferenceComponentCapacitySize", + "type": "structure", + }, + ], + "type": "structure", + }, "InferenceComponentRuntimeConfig": { "members": [ {"name": "CopyCount", "shape": "InferenceComponentCopyCount", "type": "integer"} @@ -15004,6 +15052,11 @@ "shape": "InferenceComponentRuntimeConfig", "type": "structure", }, + { + "name": "DeploymentConfig", + "shape": "InferenceComponentDeploymentConfig", + "type": "structure", + }, ], "type": "structure", }, diff --git a/src/sagemaker_core/main/resources.py b/src/sagemaker_core/main/resources.py index ebbc2c2e..9853952b 100644 --- a/src/sagemaker_core/main/resources.py +++ b/src/sagemaker_core/main/resources.py @@ -3874,7 +3874,7 @@ def batch_delete_nodes( Deletes specific nodes within a SageMaker HyperPod cluster. Parameters: - node_ids: A list of node IDs to be deleted from the specified cluster. For SageMaker HyperPod clusters using the Slurm workload manager, you cannot remove instances that are configured as Slurm controller nodes. + node_ids: A list of node IDs to be deleted from the specified cluster. For SageMaker HyperPod clusters using the Slurm workload manager, you cannot remove instances that are configured as Slurm controller nodes. If you need to delete more than 99 instances, contact Support for assistance. session: Boto3 session. region: Region name. @@ -14801,6 +14801,7 @@ class InferenceComponent(Base): specification: Details about the resources that are deployed with this inference component. runtime_config: Details about the runtime settings for the model that is deployed with the inference component. inference_component_status: The status of the inference component. + last_deployment_config: The deployment and rollback settings that you assigned to the inference component. """ @@ -14815,6 +14816,7 @@ class InferenceComponent(Base): creation_time: Optional[datetime.datetime] = Unassigned() last_modified_time: Optional[datetime.datetime] = Unassigned() inference_component_status: Optional[str] = Unassigned() + last_deployment_config: Optional[InferenceComponentDeploymentConfig] = Unassigned() def get_name(self) -> str: attributes = vars(self) @@ -14999,10 +15001,14 @@ def update( self, specification: Optional[InferenceComponentSpecification] = Unassigned(), runtime_config: Optional[InferenceComponentRuntimeConfig] = Unassigned(), + deployment_config: Optional[InferenceComponentDeploymentConfig] = Unassigned(), ) -> Optional["InferenceComponent"]: """ Update a InferenceComponent resource + Parameters: + deployment_config: The deployment configuration for the inference component. The configuration contains the desired deployment strategy and rollback settings. + Returns: The InferenceComponent resource. @@ -15026,6 +15032,7 @@ def update( "InferenceComponentName": self.inference_component_name, "Specification": specification, "RuntimeConfig": runtime_config, + "DeploymentConfig": deployment_config, } logger.debug(f"Input request: {operation_input_args}") # serialize the input request diff --git a/src/sagemaker_core/main/shapes.py b/src/sagemaker_core/main/shapes.py index 1b356859..ddcc9bbd 100644 --- a/src/sagemaker_core/main/shapes.py +++ b/src/sagemaker_core/main/shapes.py @@ -8404,6 +8404,55 @@ class InferenceComponentRuntimeConfigSummary(Base): current_copy_count: Optional[int] = Unassigned() +class InferenceComponentCapacitySize(Base): + """ + InferenceComponentCapacitySize + Specifies the type and size of the endpoint capacity to activate for a rolling deployment or a rollback strategy. You can specify your batches as either of the following: A count of inference component copies The overall percentage or your fleet For a rollback strategy, if you don't specify the fields in this object, or if you set the Value parameter to 100%, then SageMaker AI uses a blue/green rollback strategy and rolls all traffic back to the blue fleet. + + Attributes + ---------------------- + type: Specifies the endpoint capacity type. COPY_COUNT The endpoint activates based on the number of inference component copies. CAPACITY_PERCENT The endpoint activates based on the specified percentage of capacity. + value: Defines the capacity size, either as a number of inference component copies or a capacity percentage. + """ + + type: str + value: int + + +class InferenceComponentRollingUpdatePolicy(Base): + """ + InferenceComponentRollingUpdatePolicy + Specifies a rolling deployment strategy for updating a SageMaker AI inference component. + + Attributes + ---------------------- + maximum_batch_size: The batch size for each rolling step in the deployment process. For each step, SageMaker AI provisions capacity on the new endpoint fleet, routes traffic to that fleet, and terminates capacity on the old endpoint fleet. The value must be between 5% to 50% of the copy count of the inference component. + wait_interval_in_seconds: The length of the baking period, during which SageMaker AI monitors alarms for each batch on the new fleet. + maximum_execution_timeout_in_seconds: The time limit for the total deployment. Exceeding this limit causes a timeout. + rollback_maximum_batch_size: The batch size for a rollback to the old endpoint fleet. If this field is absent, the value is set to the default, which is 100% of the total capacity. When the default is used, SageMaker AI provisions the entire capacity of the old fleet at once during rollback. + """ + + maximum_batch_size: InferenceComponentCapacitySize + wait_interval_in_seconds: int + maximum_execution_timeout_in_seconds: Optional[int] = Unassigned() + rollback_maximum_batch_size: Optional[InferenceComponentCapacitySize] = Unassigned() + + +class InferenceComponentDeploymentConfig(Base): + """ + InferenceComponentDeploymentConfig + The deployment configuration for an endpoint that hosts inference components. The configuration includes the desired deployment strategy and rollback settings. + + Attributes + ---------------------- + rolling_update_policy: Specifies a rolling deployment strategy for updating a SageMaker AI endpoint. + auto_rollback_configuration + """ + + rolling_update_policy: InferenceComponentRollingUpdatePolicy + auto_rollback_configuration: Optional[AutoRollbackConfig] = Unassigned() + + class EndpointMetadata(Base): """ EndpointMetadata @@ -9638,7 +9687,7 @@ class EndpointSummary(Base): class Experiment(Base): """ Experiment - The properties of an experiment as returned by the Search API. + The properties of an experiment as returned by the Search API. For information about experiments, see the CreateExperiment API. Attributes ---------------------- @@ -11690,11 +11739,11 @@ class ModelDashboardModel(Base): class ModelPackage(Base): """ ModelPackage - A versioned model that can be deployed for SageMaker inference. + A container for your trained model that can be deployed for SageMaker inference. This can include inference code, artifacts, and metadata. The model package type can be one of the following. Versioned model: A part of a model package group in Model Registry. Unversioned model: Not part of a model package group and used in Amazon Web Services Marketplace. For more information, see CreateModelPackage . Attributes ---------------------- - model_package_name: The name of the model. + model_package_name: The name of the model package. The name can be as follows: For a versioned model, the name is automatically generated by SageMaker Model Registry and follows the format 'ModelPackageGroupName/ModelPackageVersion'. For an unversioned model, you must provide the name. model_package_group_name: The model group to which the model belongs. model_package_version: The version number of a versioned model. model_package_arn: The Amazon Resource Name (ARN) of the model package. @@ -11765,7 +11814,7 @@ class ModelPackage(Base): class ModelPackageGroup(Base): """ ModelPackageGroup - A group of versioned models in the model registry. + A group of versioned models in the Model Registry. Attributes ----------------------