Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
85 changes: 78 additions & 7 deletions sample/sagemaker/2017-07-24/service-2.json
Original file line number Diff line number Diff line change
Expand Up @@ -6513,7 +6513,7 @@
"BatchDeleteClusterNodesErrorList":{
"type":"list",
"member":{"shape":"BatchDeleteClusterNodesError"},
"max":99,
"max":3000,
"min":1
},
"BatchDeleteClusterNodesRequest":{
Expand All @@ -6529,7 +6529,7 @@
},
"NodeIds":{
"shape":"ClusterNodeIds",
"documentation":"<p>A list of node IDs to be deleted from the specified cluster.</p> <note> <p>For SageMaker HyperPod clusters using the Slurm workload manager, you cannot remove instances that are configured as Slurm controller nodes.</p> </note>"
"documentation":"<p>A list of node IDs to be deleted from the specified cluster.</p> <note> <ul> <li> <p>For SageMaker HyperPod clusters using the Slurm workload manager, you cannot remove instances that are configured as Slurm controller nodes.</p> </li> <li> <p>If you need to delete more than 99 instances, contact <a href=\"http://aws.amazon.com/contact-us/\">Support</a> for assistance.</p> </li> </ul> </note>"
}
}
},
Expand Down Expand Up @@ -7986,7 +7986,7 @@
"ClusterNodeIds":{
"type":"list",
"member":{"shape":"ClusterNodeId"},
"max":99,
"max":3000,
"min":1
},
"ClusterNodeRecovery":{
Expand Down Expand Up @@ -15959,6 +15959,10 @@
"InferenceComponentStatus":{
"shape":"InferenceComponentStatus",
"documentation":"<p>The status of the inference component.</p>"
},
"LastDeploymentConfig":{
"shape":"InferenceComponentDeploymentConfig",
"documentation":"<p>The deployment and rollback settings that you assigned to the inference component.</p>"
}
}
},
Expand Down Expand Up @@ -19977,7 +19981,7 @@
"documentation":"<p>The list of tags that are associated with the experiment. You can use <a href=\"https://docs.aws.amazon.com/sagemaker/latest/APIReference/API_Search.html\">Search</a> API to search on the tags.</p>"
}
},
"documentation":"<p>The properties of an experiment as returned by the <a href=\"https://docs.aws.amazon.com/sagemaker/latest/APIReference/API_Search.html\">Search</a> API.</p>"
"documentation":"<p>The properties of an experiment as returned by the <a href=\"https://docs.aws.amazon.com/sagemaker/latest/APIReference/API_Search.html\">Search</a> API. For information about experiments, see the <a href=\"https://docs.aws.amazon.com/sagemaker/latest/APIReference/API_CreateExperiment.html\">CreateExperiment</a> API.</p>"
},
"ExperimentArn":{
"type":"string",
Expand Down Expand Up @@ -22669,6 +22673,31 @@
"max":2048,
"min":20
},
"InferenceComponentCapacitySize":{
"type":"structure",
"required":[
"Type",
"Value"
],
"members":{
"Type":{
"shape":"InferenceComponentCapacitySizeType",
"documentation":"<p>Specifies the endpoint capacity type.</p> <dl> <dt>COPY_COUNT</dt> <dd> <p>The endpoint activates based on the number of inference component copies.</p> </dd> <dt>CAPACITY_PERCENT</dt> <dd> <p>The endpoint activates based on the specified percentage of capacity.</p> </dd> </dl>"
},
"Value":{
"shape":"CapacitySizeValue",
"documentation":"<p>Defines the capacity size, either as a number of inference component copies or a capacity percentage.</p>"
}
},
"documentation":"<p>Specifies the type and size of the endpoint capacity to activate for a rolling deployment or a rollback strategy. You can specify your batches as either of the following:</p> <ul> <li> <p>A count of inference component copies </p> </li> <li> <p>The overall percentage or your fleet </p> </li> </ul> <p>For a rollback strategy, if you don't specify the fields in this object, or if you set the <code>Value</code> parameter to 100%, then SageMaker AI uses a blue/green rollback strategy and rolls all traffic back to the blue fleet.</p>"
},
"InferenceComponentCapacitySizeType":{
"type":"string",
"enum":[
"COPY_COUNT",
"CAPACITY_PERCENT"
]
},
"InferenceComponentComputeResourceRequirements":{
"type":"structure",
"required":["MinMemoryRequiredInMb"],
Expand Down Expand Up @@ -22729,6 +22758,18 @@
"type":"integer",
"min":0
},
"InferenceComponentDeploymentConfig":{
"type":"structure",
"required":["RollingUpdatePolicy"],
"members":{
"RollingUpdatePolicy":{
"shape":"InferenceComponentRollingUpdatePolicy",
"documentation":"<p>Specifies a rolling deployment strategy for updating a SageMaker AI endpoint.</p>"
},
"AutoRollbackConfiguration":{"shape":"AutoRollbackConfig"}
},
"documentation":"<p>The deployment configuration for an endpoint that hosts inference components. The configuration includes the desired deployment strategy and rollback settings.</p>"
},
"InferenceComponentName":{
"type":"string",
"max":63,
Expand All @@ -22739,6 +22780,32 @@
"max":63,
"pattern":"[a-zA-Z0-9-]+"
},
"InferenceComponentRollingUpdatePolicy":{
"type":"structure",
"required":[
"MaximumBatchSize",
"WaitIntervalInSeconds"
],
"members":{
"MaximumBatchSize":{
"shape":"InferenceComponentCapacitySize",
"documentation":"<p>The batch size for each rolling step in the deployment process. For each step, SageMaker AI provisions capacity on the new endpoint fleet, routes traffic to that fleet, and terminates capacity on the old endpoint fleet. The value must be between 5% to 50% of the copy count of the inference component.</p>"
},
"WaitIntervalInSeconds":{
"shape":"WaitIntervalInSeconds",
"documentation":"<p>The length of the baking period, during which SageMaker AI monitors alarms for each batch on the new fleet.</p>"
},
"MaximumExecutionTimeoutInSeconds":{
"shape":"MaximumExecutionTimeoutInSeconds",
"documentation":"<p>The time limit for the total deployment. Exceeding this limit causes a timeout.</p>"
},
"RollbackMaximumBatchSize":{
"shape":"InferenceComponentCapacitySize",
"documentation":"<p>The batch size for a rollback to the old endpoint fleet. If this field is absent, the value is set to the default, which is 100% of the total capacity. When the default is used, SageMaker AI provisions the entire capacity of the old fleet at once during rollback.</p>"
}
},
"documentation":"<p>Specifies a rolling deployment strategy for updating a SageMaker AI inference component.</p>"
},
"InferenceComponentRuntimeConfig":{
"type":"structure",
"required":["CopyCount"],
Expand Down Expand Up @@ -29780,7 +29847,7 @@
"members":{
"ModelPackageName":{
"shape":"EntityName",
"documentation":"<p>The name of the model.</p>"
"documentation":"<p>The name of the model package. The name can be as follows:</p> <ul> <li> <p>For a versioned model, the name is automatically generated by SageMaker Model Registry and follows the format '<code>ModelPackageGroupName/ModelPackageVersion</code>'.</p> </li> <li> <p>For an unversioned model, you must provide the name.</p> </li> </ul>"
},
"ModelPackageGroupName":{
"shape":"EntityName",
Expand Down Expand Up @@ -29897,7 +29964,7 @@
"documentation":"<p>Indicates if you want to skip model validation.</p>"
}
},
"documentation":"<p>A versioned model that can be deployed for SageMaker inference.</p>"
"documentation":"<p>A container for your trained model that can be deployed for SageMaker inference. This can include inference code, artifacts, and metadata. The model package type can be one of the following.</p> <ul> <li> <p>Versioned model: A part of a model package group in Model Registry.</p> </li> <li> <p>Unversioned model: Not part of a model package group and used in Amazon Web Services Marketplace.</p> </li> </ul> <p>For more information, see <a href=\"https://docs.aws.amazon.com/sagemaker/latest/APIReference/API_CreateModelPackage.html\"> <code>CreateModelPackage</code> </a>.</p>"
},
"ModelPackageArn":{
"type":"string",
Expand Down Expand Up @@ -30011,7 +30078,7 @@
"documentation":"<p>A list of the tags associated with the model group. For more information, see <a href=\"https://docs.aws.amazon.com/general/latest/gr/aws_tagging.html\">Tagging Amazon Web Services resources</a> in the <i>Amazon Web Services General Reference Guide</i>.</p>"
}
},
"documentation":"<p>A group of versioned models in the model registry.</p>"
"documentation":"<p>A group of versioned models in the Model Registry.</p>"
},
"ModelPackageGroupArn":{
"type":"string",
Expand Down Expand Up @@ -40889,6 +40956,10 @@
"RuntimeConfig":{
"shape":"InferenceComponentRuntimeConfig",
"documentation":"<p>Runtime settings for a model that is deployed with an inference component.</p>"
},
"DeploymentConfig":{
"shape":"InferenceComponentDeploymentConfig",
"documentation":"<p>The deployment configuration for the inference component. The configuration contains the desired deployment strategy and rollback settings.</p>"
}
}
},
Expand Down
53 changes: 53 additions & 0 deletions src/sagemaker_core/main/code_injection/shape_dag.py
Original file line number Diff line number Diff line change
Expand Up @@ -4911,6 +4911,11 @@
"shape": "InferenceComponentStatus",
"type": "string",
},
{
"name": "LastDeploymentConfig",
"shape": "InferenceComponentDeploymentConfig",
"type": "structure",
},
],
"type": "structure",
},
Expand Down Expand Up @@ -7670,6 +7675,13 @@
],
"type": "structure",
},
"InferenceComponentCapacitySize": {
"members": [
{"name": "Type", "shape": "InferenceComponentCapacitySizeType", "type": "string"},
{"name": "Value", "shape": "CapacitySizeValue", "type": "integer"},
],
"type": "structure",
},
"InferenceComponentComputeResourceRequirements": {
"members": [
{"name": "NumberOfCpuCoresRequired", "shape": "NumberOfCpuCores", "type": "float"},
Expand Down Expand Up @@ -7699,6 +7711,42 @@
],
"type": "structure",
},
"InferenceComponentDeploymentConfig": {
"members": [
{
"name": "RollingUpdatePolicy",
"shape": "InferenceComponentRollingUpdatePolicy",
"type": "structure",
},
{
"name": "AutoRollbackConfiguration",
"shape": "AutoRollbackConfig",
"type": "structure",
},
],
"type": "structure",
},
"InferenceComponentRollingUpdatePolicy": {
"members": [
{
"name": "MaximumBatchSize",
"shape": "InferenceComponentCapacitySize",
"type": "structure",
},
{"name": "WaitIntervalInSeconds", "shape": "WaitIntervalInSeconds", "type": "integer"},
{
"name": "MaximumExecutionTimeoutInSeconds",
"shape": "MaximumExecutionTimeoutInSeconds",
"type": "integer",
},
{
"name": "RollbackMaximumBatchSize",
"shape": "InferenceComponentCapacitySize",
"type": "structure",
},
],
"type": "structure",
},
"InferenceComponentRuntimeConfig": {
"members": [
{"name": "CopyCount", "shape": "InferenceComponentCopyCount", "type": "integer"}
Expand Down Expand Up @@ -15004,6 +15052,11 @@
"shape": "InferenceComponentRuntimeConfig",
"type": "structure",
},
{
"name": "DeploymentConfig",
"shape": "InferenceComponentDeploymentConfig",
"type": "structure",
},
],
"type": "structure",
},
Expand Down
9 changes: 8 additions & 1 deletion src/sagemaker_core/main/resources.py
Original file line number Diff line number Diff line change
Expand Up @@ -3874,7 +3874,7 @@ def batch_delete_nodes(
Deletes specific nodes within a SageMaker HyperPod cluster.

Parameters:
node_ids: A list of node IDs to be deleted from the specified cluster. For SageMaker HyperPod clusters using the Slurm workload manager, you cannot remove instances that are configured as Slurm controller nodes.
node_ids: A list of node IDs to be deleted from the specified cluster. For SageMaker HyperPod clusters using the Slurm workload manager, you cannot remove instances that are configured as Slurm controller nodes. If you need to delete more than 99 instances, contact Support for assistance.
session: Boto3 session.
region: Region name.

Expand Down Expand Up @@ -14801,6 +14801,7 @@ class InferenceComponent(Base):
specification: Details about the resources that are deployed with this inference component.
runtime_config: Details about the runtime settings for the model that is deployed with the inference component.
inference_component_status: The status of the inference component.
last_deployment_config: The deployment and rollback settings that you assigned to the inference component.

"""

Expand All @@ -14815,6 +14816,7 @@ class InferenceComponent(Base):
creation_time: Optional[datetime.datetime] = Unassigned()
last_modified_time: Optional[datetime.datetime] = Unassigned()
inference_component_status: Optional[str] = Unassigned()
last_deployment_config: Optional[InferenceComponentDeploymentConfig] = Unassigned()

def get_name(self) -> str:
attributes = vars(self)
Expand Down Expand Up @@ -14999,10 +15001,14 @@ def update(
self,
specification: Optional[InferenceComponentSpecification] = Unassigned(),
runtime_config: Optional[InferenceComponentRuntimeConfig] = Unassigned(),
deployment_config: Optional[InferenceComponentDeploymentConfig] = Unassigned(),
) -> Optional["InferenceComponent"]:
"""
Update a InferenceComponent resource

Parameters:
deployment_config: The deployment configuration for the inference component. The configuration contains the desired deployment strategy and rollback settings.

Returns:
The InferenceComponent resource.

Expand All @@ -15026,6 +15032,7 @@ def update(
"InferenceComponentName": self.inference_component_name,
"Specification": specification,
"RuntimeConfig": runtime_config,
"DeploymentConfig": deployment_config,
}
logger.debug(f"Input request: {operation_input_args}")
# serialize the input request
Expand Down
57 changes: 53 additions & 4 deletions src/sagemaker_core/main/shapes.py
Original file line number Diff line number Diff line change
Expand Up @@ -8404,6 +8404,55 @@ class InferenceComponentRuntimeConfigSummary(Base):
current_copy_count: Optional[int] = Unassigned()


class InferenceComponentCapacitySize(Base):
"""
InferenceComponentCapacitySize
Specifies the type and size of the endpoint capacity to activate for a rolling deployment or a rollback strategy. You can specify your batches as either of the following: A count of inference component copies The overall percentage or your fleet For a rollback strategy, if you don't specify the fields in this object, or if you set the Value parameter to 100%, then SageMaker AI uses a blue/green rollback strategy and rolls all traffic back to the blue fleet.

Attributes
----------------------
type: Specifies the endpoint capacity type. COPY_COUNT The endpoint activates based on the number of inference component copies. CAPACITY_PERCENT The endpoint activates based on the specified percentage of capacity.
value: Defines the capacity size, either as a number of inference component copies or a capacity percentage.
"""

type: str
value: int


class InferenceComponentRollingUpdatePolicy(Base):
"""
InferenceComponentRollingUpdatePolicy
Specifies a rolling deployment strategy for updating a SageMaker AI inference component.

Attributes
----------------------
maximum_batch_size: The batch size for each rolling step in the deployment process. For each step, SageMaker AI provisions capacity on the new endpoint fleet, routes traffic to that fleet, and terminates capacity on the old endpoint fleet. The value must be between 5% to 50% of the copy count of the inference component.
wait_interval_in_seconds: The length of the baking period, during which SageMaker AI monitors alarms for each batch on the new fleet.
maximum_execution_timeout_in_seconds: The time limit for the total deployment. Exceeding this limit causes a timeout.
rollback_maximum_batch_size: The batch size for a rollback to the old endpoint fleet. If this field is absent, the value is set to the default, which is 100% of the total capacity. When the default is used, SageMaker AI provisions the entire capacity of the old fleet at once during rollback.
"""

maximum_batch_size: InferenceComponentCapacitySize
wait_interval_in_seconds: int
maximum_execution_timeout_in_seconds: Optional[int] = Unassigned()
rollback_maximum_batch_size: Optional[InferenceComponentCapacitySize] = Unassigned()


class InferenceComponentDeploymentConfig(Base):
"""
InferenceComponentDeploymentConfig
The deployment configuration for an endpoint that hosts inference components. The configuration includes the desired deployment strategy and rollback settings.

Attributes
----------------------
rolling_update_policy: Specifies a rolling deployment strategy for updating a SageMaker AI endpoint.
auto_rollback_configuration
"""

rolling_update_policy: InferenceComponentRollingUpdatePolicy
auto_rollback_configuration: Optional[AutoRollbackConfig] = Unassigned()


class EndpointMetadata(Base):
"""
EndpointMetadata
Expand Down Expand Up @@ -9638,7 +9687,7 @@ class EndpointSummary(Base):
class Experiment(Base):
"""
Experiment
The properties of an experiment as returned by the Search API.
The properties of an experiment as returned by the Search API. For information about experiments, see the CreateExperiment API.

Attributes
----------------------
Expand Down Expand Up @@ -11690,11 +11739,11 @@ class ModelDashboardModel(Base):
class ModelPackage(Base):
"""
ModelPackage
A versioned model that can be deployed for SageMaker inference.
A container for your trained model that can be deployed for SageMaker inference. This can include inference code, artifacts, and metadata. The model package type can be one of the following. Versioned model: A part of a model package group in Model Registry. Unversioned model: Not part of a model package group and used in Amazon Web Services Marketplace. For more information, see CreateModelPackage .

Attributes
----------------------
model_package_name: The name of the model.
model_package_name: The name of the model package. The name can be as follows: For a versioned model, the name is automatically generated by SageMaker Model Registry and follows the format 'ModelPackageGroupName/ModelPackageVersion'. For an unversioned model, you must provide the name.
model_package_group_name: The model group to which the model belongs.
model_package_version: The version number of a versioned model.
model_package_arn: The Amazon Resource Name (ARN) of the model package.
Expand Down Expand Up @@ -11765,7 +11814,7 @@ class ModelPackage(Base):
class ModelPackageGroup(Base):
"""
ModelPackageGroup
A group of versioned models in the model registry.
A group of versioned models in the Model Registry.

Attributes
----------------------
Expand Down