aws · sagemaker-bot · Feb 27, 2025 · Feb 27, 2025
@@ -6513,7 +6513,7 @@
     "BatchDeleteClusterNodesErrorList":{
       "type":"list",
       "member":{"shape":"BatchDeleteClusterNodesError"},
-      "max":99,
+      "max":3000,
       "min":1
     },
     "BatchDeleteClusterNodesRequest":{
@@ -6529,7 +6529,7 @@
         },
         "NodeIds":{
           "shape":"ClusterNodeIds",
-          "documentation":"<p>A list of node IDs to be deleted from the specified cluster.</p> <note> <p>For SageMaker HyperPod clusters using the Slurm workload manager, you cannot remove instances that are configured as Slurm controller nodes.</p> </note>"
+          "documentation":"<p>A list of node IDs to be deleted from the specified cluster.</p> <note> <ul> <li> <p>For SageMaker HyperPod clusters using the Slurm workload manager, you cannot remove instances that are configured as Slurm controller nodes.</p> </li> <li> <p>If you need to delete more than 99 instances, contact <a href=\"http://aws.amazon.com/contact-us/\">Support</a> for assistance.</p> </li> </ul> </note>"
         }
       }
     },
@@ -7986,7 +7986,7 @@
     "ClusterNodeIds":{
       "type":"list",
       "member":{"shape":"ClusterNodeId"},
-      "max":99,
+      "max":3000,
       "min":1
     },
     "ClusterNodeRecovery":{
@@ -15959,6 +15959,10 @@
         "InferenceComponentStatus":{
           "shape":"InferenceComponentStatus",
           "documentation":"<p>The status of the inference component.</p>"
+        },
+        "LastDeploymentConfig":{
+          "shape":"InferenceComponentDeploymentConfig",
+          "documentation":"<p>The deployment and rollback settings that you assigned to the inference component.</p>"
         }
       }
     },
@@ -19977,7 +19981,7 @@
           "documentation":"<p>The list of tags that are associated with the experiment. You can use <a href=\"https://docs.aws.amazon.com/sagemaker/latest/APIReference/API_Search.html\">Search</a> API to search on the tags.</p>"
         }
       },
-      "documentation":"<p>The properties of an experiment as returned by the <a href=\"https://docs.aws.amazon.com/sagemaker/latest/APIReference/API_Search.html\">Search</a> API.</p>"
+      "documentation":"<p>The properties of an experiment as returned by the <a href=\"https://docs.aws.amazon.com/sagemaker/latest/APIReference/API_Search.html\">Search</a> API. For information about experiments, see the <a href=\"https://docs.aws.amazon.com/sagemaker/latest/APIReference/API_CreateExperiment.html\">CreateExperiment</a> API.</p>"
     },
     "ExperimentArn":{
       "type":"string",
@@ -22669,6 +22673,31 @@
       "max":2048,
       "min":20
     },
+    "InferenceComponentCapacitySize":{
+      "type":"structure",
+      "required":[
+        "Type",
+        "Value"
+      ],
+      "members":{
+        "Type":{
+          "shape":"InferenceComponentCapacitySizeType",
+          "documentation":"<p>Specifies the endpoint capacity type.</p> <dl> <dt>COPY_COUNT</dt> <dd> <p>The endpoint activates based on the number of inference component copies.</p> </dd> <dt>CAPACITY_PERCENT</dt> <dd> <p>The endpoint activates based on the specified percentage of capacity.</p> </dd> </dl>"
+        },
+        "Value":{
+          "shape":"CapacitySizeValue",
+          "documentation":"<p>Defines the capacity size, either as a number of inference component copies or a capacity percentage.</p>"
+        }
+      },
+      "documentation":"<p>Specifies the type and size of the endpoint capacity to activate for a rolling deployment or a rollback strategy. You can specify your batches as either of the following:</p> <ul> <li> <p>A count of inference component copies </p> </li> <li> <p>The overall percentage or your fleet </p> </li> </ul> <p>For a rollback strategy, if you don't specify the fields in this object, or if you set the <code>Value</code> parameter to 100%, then SageMaker AI uses a blue/green rollback strategy and rolls all traffic back to the blue fleet.</p>"
+    },
+    "InferenceComponentCapacitySizeType":{
+      "type":"string",
+      "enum":[
+        "COPY_COUNT",
+        "CAPACITY_PERCENT"
+      ]
+    },
     "InferenceComponentComputeResourceRequirements":{
       "type":"structure",
       "required":["MinMemoryRequiredInMb"],
@@ -22729,6 +22758,18 @@
       "type":"integer",
       "min":0
     },
+    "InferenceComponentDeploymentConfig":{
+      "type":"structure",
+      "required":["RollingUpdatePolicy"],
+      "members":{
+        "RollingUpdatePolicy":{
+          "shape":"InferenceComponentRollingUpdatePolicy",
+          "documentation":"<p>Specifies a rolling deployment strategy for updating a SageMaker AI endpoint.</p>"
+        },
+        "AutoRollbackConfiguration":{"shape":"AutoRollbackConfig"}
+      },
+      "documentation":"<p>The deployment configuration for an endpoint that hosts inference components. The configuration includes the desired deployment strategy and rollback settings.</p>"
+    },
     "InferenceComponentName":{
       "type":"string",
       "max":63,
@@ -22739,6 +22780,32 @@
       "max":63,
       "pattern":"[a-zA-Z0-9-]+"
     },
+    "InferenceComponentRollingUpdatePolicy":{
+      "type":"structure",
+      "required":[
+        "MaximumBatchSize",
+        "WaitIntervalInSeconds"
+      ],
+      "members":{
+        "MaximumBatchSize":{
+          "shape":"InferenceComponentCapacitySize",
+          "documentation":"<p>The batch size for each rolling step in the deployment process. For each step, SageMaker AI provisions capacity on the new endpoint fleet, routes traffic to that fleet, and terminates capacity on the old endpoint fleet. The value must be between 5% to 50% of the copy count of the inference component.</p>"
+        },
+        "WaitIntervalInSeconds":{
+          "shape":"WaitIntervalInSeconds",
+          "documentation":"<p>The length of the baking period, during which SageMaker AI monitors alarms for each batch on the new fleet.</p>"
+        },
+        "MaximumExecutionTimeoutInSeconds":{
+          "shape":"MaximumExecutionTimeoutInSeconds",
+          "documentation":"<p>The time limit for the total deployment. Exceeding this limit causes a timeout.</p>"
+        },
+        "RollbackMaximumBatchSize":{
+          "shape":"InferenceComponentCapacitySize",
+          "documentation":"<p>The batch size for a rollback to the old endpoint fleet. If this field is absent, the value is set to the default, which is 100% of the total capacity. When the default is used, SageMaker AI provisions the entire capacity of the old fleet at once during rollback.</p>"
+        }
+      },
+      "documentation":"<p>Specifies a rolling deployment strategy for updating a SageMaker AI inference component.</p>"
+    },
     "InferenceComponentRuntimeConfig":{
       "type":"structure",
       "required":["CopyCount"],
@@ -29780,7 +29847,7 @@
       "members":{
         "ModelPackageName":{
           "shape":"EntityName",
-          "documentation":"<p>The name of the model.</p>"
+          "documentation":"<p>The name of the model package. The name can be as follows:</p> <ul> <li> <p>For a versioned model, the name is automatically generated by SageMaker Model Registry and follows the format '<code>ModelPackageGroupName/ModelPackageVersion</code>'.</p> </li> <li> <p>For an unversioned model, you must provide the name.</p> </li> </ul>"
         },
         "ModelPackageGroupName":{
           "shape":"EntityName",
@@ -29897,7 +29964,7 @@
           "documentation":"<p>Indicates if you want to skip model validation.</p>"
         }
       },
-      "documentation":"<p>A versioned model that can be deployed for SageMaker inference.</p>"
+      "documentation":"<p>A container for your trained model that can be deployed for SageMaker inference. This can include inference code, artifacts, and metadata. The model package type can be one of the following.</p> <ul> <li> <p>Versioned model: A part of a model package group in Model Registry.</p> </li> <li> <p>Unversioned model: Not part of a model package group and used in Amazon Web Services Marketplace.</p> </li> </ul> <p>For more information, see <a href=\"https://docs.aws.amazon.com/sagemaker/latest/APIReference/API_CreateModelPackage.html\"> <code>CreateModelPackage</code> </a>.</p>"
     },
     "ModelPackageArn":{
       "type":"string",
@@ -30011,7 +30078,7 @@
           "documentation":"<p>A list of the tags associated with the model group. For more information, see <a href=\"https://docs.aws.amazon.com/general/latest/gr/aws_tagging.html\">Tagging Amazon Web Services resources</a> in the <i>Amazon Web Services General Reference Guide</i>.</p>"
         }
       },
-      "documentation":"<p>A group of versioned models in the model registry.</p>"
+      "documentation":"<p>A group of versioned models in the Model Registry.</p>"
     },
     "ModelPackageGroupArn":{
       "type":"string",
@@ -40889,6 +40956,10 @@
         "RuntimeConfig":{
           "shape":"InferenceComponentRuntimeConfig",
           "documentation":"<p>Runtime settings for a model that is deployed with an inference component.</p>"
+        },
+        "DeploymentConfig":{
+          "shape":"InferenceComponentDeploymentConfig",
+          "documentation":"<p>The deployment configuration for the inference component. The configuration contains the desired deployment strategy and rollback settings.</p>"
         }
       }
     },

@@ -4911,6 +4911,11 @@
                 "shape": "InferenceComponentStatus",
                 "type": "string",
             },
+            {
+                "name": "LastDeploymentConfig",
+                "shape": "InferenceComponentDeploymentConfig",
+                "type": "structure",
+            },
         ],
         "type": "structure",
     },
@@ -7670,6 +7675,13 @@
         ],
         "type": "structure",
     },
+    "InferenceComponentCapacitySize": {
+        "members": [
+            {"name": "Type", "shape": "InferenceComponentCapacitySizeType", "type": "string"},
+            {"name": "Value", "shape": "CapacitySizeValue", "type": "integer"},
+        ],
+        "type": "structure",
+    },
     "InferenceComponentComputeResourceRequirements": {
         "members": [
             {"name": "NumberOfCpuCoresRequired", "shape": "NumberOfCpuCores", "type": "float"},
@@ -7699,6 +7711,42 @@
         ],
         "type": "structure",
     },
+    "InferenceComponentDeploymentConfig": {
+        "members": [
+            {
+                "name": "RollingUpdatePolicy",
+                "shape": "InferenceComponentRollingUpdatePolicy",
+                "type": "structure",
+            },
+            {
+                "name": "AutoRollbackConfiguration",
+                "shape": "AutoRollbackConfig",
+                "type": "structure",
+            },
+        ],
+        "type": "structure",
+    },
+    "InferenceComponentRollingUpdatePolicy": {
+        "members": [
+            {
+                "name": "MaximumBatchSize",
+                "shape": "InferenceComponentCapacitySize",
+                "type": "structure",
+            },
+            {"name": "WaitIntervalInSeconds", "shape": "WaitIntervalInSeconds", "type": "integer"},
+            {
+                "name": "MaximumExecutionTimeoutInSeconds",
+                "shape": "MaximumExecutionTimeoutInSeconds",
+                "type": "integer",
+            },
+            {
+                "name": "RollbackMaximumBatchSize",
+                "shape": "InferenceComponentCapacitySize",
+                "type": "structure",
+            },
+        ],
+        "type": "structure",
+    },
     "InferenceComponentRuntimeConfig": {
         "members": [
             {"name": "CopyCount", "shape": "InferenceComponentCopyCount", "type": "integer"}
@@ -15004,6 +15052,11 @@
                 "shape": "InferenceComponentRuntimeConfig",
                 "type": "structure",
             },
+            {
+                "name": "DeploymentConfig",
+                "shape": "InferenceComponentDeploymentConfig",
+                "type": "structure",
+            },
         ],
         "type": "structure",
     },

@@ -3874,7 +3874,7 @@ def batch_delete_nodes(
         Deletes specific nodes within a SageMaker HyperPod cluster.
 
         Parameters:
-            node_ids: A list of node IDs to be deleted from the specified cluster.  For SageMaker HyperPod clusters using the Slurm workload manager, you cannot remove instances that are configured as Slurm controller nodes.
+            node_ids: A list of node IDs to be deleted from the specified cluster.    For SageMaker HyperPod clusters using the Slurm workload manager, you cannot remove instances that are configured as Slurm controller nodes.   If you need to delete more than 99 instances, contact Support for assistance.
             session: Boto3 session.
             region: Region name.
 
@@ -14801,6 +14801,7 @@ class InferenceComponent(Base):
         specification: Details about the resources that are deployed with this inference component.
         runtime_config: Details about the runtime settings for the model that is deployed with the inference component.
         inference_component_status: The status of the inference component.
+        last_deployment_config: The deployment and rollback settings that you assigned to the inference component.
 
     """
 
@@ -14815,6 +14816,7 @@ class InferenceComponent(Base):
     creation_time: Optional[datetime.datetime] = Unassigned()
     last_modified_time: Optional[datetime.datetime] = Unassigned()
     inference_component_status: Optional[str] = Unassigned()
+    last_deployment_config: Optional[InferenceComponentDeploymentConfig] = Unassigned()
 
     def get_name(self) -> str:
         attributes = vars(self)
@@ -14999,10 +15001,14 @@ def update(
         self,
         specification: Optional[InferenceComponentSpecification] = Unassigned(),
         runtime_config: Optional[InferenceComponentRuntimeConfig] = Unassigned(),
+        deployment_config: Optional[InferenceComponentDeploymentConfig] = Unassigned(),
     ) -> Optional["InferenceComponent"]:
         """
         Update a InferenceComponent resource
 
+        Parameters:
+            deployment_config: The deployment configuration for the inference component. The configuration contains the desired deployment strategy and rollback settings.
+
         Returns:
             The InferenceComponent resource.
 
@@ -15026,6 +15032,7 @@ def update(
             "InferenceComponentName": self.inference_component_name,
             "Specification": specification,
             "RuntimeConfig": runtime_config,
+            "DeploymentConfig": deployment_config,
         }
         logger.debug(f"Input request: {operation_input_args}")
         # serialize the input request

@@ -8404,6 +8404,55 @@ class InferenceComponentRuntimeConfigSummary(Base):
     current_copy_count: Optional[int] = Unassigned()
 
 
+class InferenceComponentCapacitySize(Base):
+    """
+    InferenceComponentCapacitySize
+      Specifies the type and size of the endpoint capacity to activate for a rolling deployment or a rollback strategy. You can specify your batches as either of the following:   A count of inference component copies    The overall percentage or your fleet    For a rollback strategy, if you don't specify the fields in this object, or if you set the Value parameter to 100%, then SageMaker AI uses a blue/green rollback strategy and rolls all traffic back to the blue fleet.
+
+    Attributes
+    ----------------------
+    type: Specifies the endpoint capacity type.  COPY_COUNT  The endpoint activates based on the number of inference component copies.  CAPACITY_PERCENT  The endpoint activates based on the specified percentage of capacity.
+    value: Defines the capacity size, either as a number of inference component copies or a capacity percentage.
+    """
+
+    type: str
+    value: int
+
+
+class InferenceComponentRollingUpdatePolicy(Base):
+    """
+    InferenceComponentRollingUpdatePolicy
+      Specifies a rolling deployment strategy for updating a SageMaker AI inference component.
+
+    Attributes
+    ----------------------
+    maximum_batch_size: The batch size for each rolling step in the deployment process. For each step, SageMaker AI provisions capacity on the new endpoint fleet, routes traffic to that fleet, and terminates capacity on the old endpoint fleet. The value must be between 5% to 50% of the copy count of the inference component.
+    wait_interval_in_seconds: The length of the baking period, during which SageMaker AI monitors alarms for each batch on the new fleet.
+    maximum_execution_timeout_in_seconds: The time limit for the total deployment. Exceeding this limit causes a timeout.
+    rollback_maximum_batch_size: The batch size for a rollback to the old endpoint fleet. If this field is absent, the value is set to the default, which is 100% of the total capacity. When the default is used, SageMaker AI provisions the entire capacity of the old fleet at once during rollback.
+    """
+
+    maximum_batch_size: InferenceComponentCapacitySize
+    wait_interval_in_seconds: int
+    maximum_execution_timeout_in_seconds: Optional[int] = Unassigned()
+    rollback_maximum_batch_size: Optional[InferenceComponentCapacitySize] = Unassigned()
+
+
+class InferenceComponentDeploymentConfig(Base):
+    """
+    InferenceComponentDeploymentConfig
+      The deployment configuration for an endpoint that hosts inference components. The configuration includes the desired deployment strategy and rollback settings.
+
+    Attributes
+    ----------------------
+    rolling_update_policy: Specifies a rolling deployment strategy for updating a SageMaker AI endpoint.
+    auto_rollback_configuration
+    """
+
+    rolling_update_policy: InferenceComponentRollingUpdatePolicy
+    auto_rollback_configuration: Optional[AutoRollbackConfig] = Unassigned()
+
+
 class EndpointMetadata(Base):
     """
     EndpointMetadata
@@ -9638,7 +9687,7 @@ class EndpointSummary(Base):
 class Experiment(Base):
     """
     Experiment
-      The properties of an experiment as returned by the Search API.
+      The properties of an experiment as returned by the Search API. For information about experiments, see the CreateExperiment API.
 
     Attributes
     ----------------------
@@ -11690,11 +11739,11 @@ class ModelDashboardModel(Base):
 class ModelPackage(Base):
     """
     ModelPackage
-      A versioned model that can be deployed for SageMaker inference.
+      A container for your trained model that can be deployed for SageMaker inference. This can include inference code, artifacts, and metadata. The model package type can be one of the following.   Versioned model: A part of a model package group in Model Registry.   Unversioned model: Not part of a model package group and used in Amazon Web Services Marketplace.   For more information, see  CreateModelPackage .
 
     Attributes
     ----------------------
-    model_package_name: The name of the model.
+    model_package_name: The name of the model package. The name can be as follows:   For a versioned model, the name is automatically generated by SageMaker Model Registry and follows the format 'ModelPackageGroupName/ModelPackageVersion'.   For an unversioned model, you must provide the name.
     model_package_group_name: The model group to which the model belongs.
     model_package_version: The version number of a versioned model.
     model_package_arn: The Amazon Resource Name (ARN) of the model package.
@@ -11765,7 +11814,7 @@ class ModelPackage(Base):
 class ModelPackageGroup(Base):
     """
     ModelPackageGroup
-      A group of versioned models in the model registry.
+      A group of versioned models in the Model Registry.
 
     Attributes
     ----------------------