diff --git a/sample/sagemaker/2017-07-24/service-2.json b/sample/sagemaker/2017-07-24/service-2.json index fdf9dfb..553dfa8 100644 --- a/sample/sagemaker/2017-07-24/service-2.json +++ b/sample/sagemaker/2017-07-24/service-2.json @@ -4737,6 +4737,20 @@ "Disabled" ] }, + "ActiveClusterOperationCount":{ + "type":"integer", + "box":true, + "min":1 + }, + "ActiveClusterOperationName":{ + "type":"string", + "enum":["Scaling"] + }, + "ActiveOperations":{ + "type":"map", + "key":{"shape":"ActiveClusterOperationName"}, + "value":{"shape":"ActiveClusterOperationCount"} + }, "AddAssociationRequest":{ "type":"structure", "required":[ @@ -8715,6 +8729,27 @@ "type":"string", "pattern":"[a-z]{3}\\d-az\\d" }, + "ClusterCapacityRequirements":{ + "type":"structure", + "members":{ + "Spot":{ + "shape":"ClusterSpotOptions", + "documentation":"

Configuration options specific to Spot instances.

" + }, + "OnDemand":{ + "shape":"ClusterOnDemandOptions", + "documentation":"

Configuration options specific to On-Demand instances.

" + } + }, + "documentation":"

Defines the instance capacity requirements for an instance group, including configurations for both Spot and On-Demand capacity types.

" + }, + "ClusterCapacityType":{ + "type":"string", + "enum":[ + "Spot", + "OnDemand" + ] + }, "ClusterConfigMode":{ "type":"string", "enum":[ @@ -8878,6 +8913,10 @@ "shape":"ClusterInstanceCount", "documentation":"

The number of instances you specified to add to the instance group of a SageMaker HyperPod cluster.

" }, + "MinCount":{ + "shape":"ClusterInstanceCount", + "documentation":"

The minimum number of instances that must be available in the instance group of a SageMaker HyperPod cluster before it transitions to InService status.

" + }, "InstanceGroupName":{ "shape":"ClusterInstanceGroupName", "documentation":"

The name of the instance group of a SageMaker HyperPod cluster.

" @@ -8934,6 +8973,18 @@ "shape":"ImageId", "documentation":"

The ID of the Amazon Machine Image (AMI) desired for the instance group.

" }, + "ActiveOperations":{ + "shape":"ActiveOperations", + "documentation":"

A map indicating active operations currently in progress for the instance group of a SageMaker HyperPod cluster. When there is a scaling operation in progress, this map contains a key Scaling with value 1.

" + }, + "KubernetesConfig":{ + "shape":"ClusterKubernetesConfigDetails", + "documentation":"

The Kubernetes configuration for the instance group that contains labels and taints to be applied for the nodes in this instance group.

" + }, + "CapacityRequirements":{ + "shape":"ClusterCapacityRequirements", + "documentation":"

The instance capacity requirements for the instance group.

" + }, "TargetStateCount":{ "shape":"ClusterInstanceCount", "documentation":"

The number of nodes running a specific image ID since the last software update request.

" @@ -8970,6 +9021,10 @@ "shape":"ClusterInstanceCount", "documentation":"

Specifies the number of instances to add to the instance group of a SageMaker HyperPod cluster.

" }, + "MinInstanceCount":{ + "shape":"ClusterInstanceCount", + "documentation":"

Defines the minimum number of instances required for an instance group to become InService. If this threshold isn't met within 3 hours, the instance group rolls back to its previous state - zero instances for new instance groups, or previous settings for existing instance groups. MinInstanceCount only affects the initial transition to InService and does not guarantee maintaining this minimum afterward.

" + }, "InstanceGroupName":{ "shape":"ClusterInstanceGroupName", "documentation":"

Specifies the name of the instance group.

" @@ -9013,6 +9068,14 @@ "ImageId":{ "shape":"ImageId", "documentation":"

When configuring your HyperPod cluster, you can specify an image ID using one of the following options:

If you choose to use a custom AMI (CustomAmiId), ensure it meets the following requirements:

When updating the instance group's AMI through the UpdateClusterSoftware operation, if an instance group uses a custom AMI, you must provide an ImageId or use the default as input. Note that if you don't specify an instance group in your UpdateClusterSoftware request, then all of the instance groups are patched with the specified image.

" + }, + "KubernetesConfig":{ + "shape":"ClusterKubernetesConfig", + "documentation":"

Specifies the Kubernetes configuration for the instance group. You describe what you want the labels and taints to look like, and the cluster works to reconcile the actual state with the declared state for nodes in this instance group.

" + }, + "CapacityRequirements":{ + "shape":"ClusterCapacityRequirements", + "documentation":"

Specifies the capacity requirements for the instance group.

" } }, "documentation":"

The specifications of an instance group that you need to define.

" @@ -9212,6 +9275,131 @@ "ml.r7i.48xlarge" ] }, + "ClusterKubernetesConfig":{ + "type":"structure", + "members":{ + "Labels":{ + "shape":"ClusterKubernetesLabels", + "documentation":"

Key-value pairs of labels to be applied to cluster nodes.

" + }, + "Taints":{ + "shape":"ClusterKubernetesTaints", + "documentation":"

List of taints to be applied to cluster nodes.

" + } + }, + "documentation":"

Kubernetes configuration that specifies labels and taints to be applied to cluster nodes in an instance group.

" + }, + "ClusterKubernetesConfigDetails":{ + "type":"structure", + "members":{ + "CurrentLabels":{ + "shape":"ClusterKubernetesLabels", + "documentation":"

The current labels applied to cluster nodes of an instance group.

" + }, + "DesiredLabels":{ + "shape":"ClusterKubernetesLabels", + "documentation":"

The desired labels to be applied to cluster nodes of an instance group.

" + }, + "CurrentTaints":{ + "shape":"ClusterKubernetesTaints", + "documentation":"

The current taints applied to cluster nodes of an instance group.

" + }, + "DesiredTaints":{ + "shape":"ClusterKubernetesTaints", + "documentation":"

The desired taints to be applied to cluster nodes of an instance group.

" + } + }, + "documentation":"

Detailed Kubernetes configuration showing both the current and desired state of labels and taints for cluster nodes.

" + }, + "ClusterKubernetesConfigNodeDetails":{ + "type":"structure", + "members":{ + "CurrentLabels":{ + "shape":"ClusterKubernetesLabels", + "documentation":"

The current labels applied to the cluster node.

" + }, + "DesiredLabels":{ + "shape":"ClusterKubernetesLabels", + "documentation":"

The desired labels to be applied to the cluster node.

" + }, + "CurrentTaints":{ + "shape":"ClusterKubernetesTaints", + "documentation":"

The current taints applied to the cluster node.

" + }, + "DesiredTaints":{ + "shape":"ClusterKubernetesTaints", + "documentation":"

The desired taints to be applied to the cluster node.

" + } + }, + "documentation":"

Node-specific Kubernetes configuration showing both current and desired state of labels and taints for an individual cluster node.

" + }, + "ClusterKubernetesLabelKey":{ + "type":"string", + "max":317, + "min":1, + "pattern":"([a-z0-9]([-a-z0-9]*[a-z0-9])?(\\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?[A-Za-z0-9]([-A-Za-z0-9_.]*[A-Za-z0-9])?" + }, + "ClusterKubernetesLabelValue":{ + "type":"string", + "max":63, + "min":1, + "pattern":"(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])?" + }, + "ClusterKubernetesLabels":{ + "type":"map", + "key":{"shape":"ClusterKubernetesLabelKey"}, + "value":{"shape":"ClusterKubernetesLabelValue"}, + "max":50, + "min":0 + }, + "ClusterKubernetesTaint":{ + "type":"structure", + "required":[ + "Key", + "Effect" + ], + "members":{ + "Key":{ + "shape":"ClusterKubernetesTaintKey", + "documentation":"

The key of the taint.

" + }, + "Value":{ + "shape":"ClusterKubernetesTaintValue", + "documentation":"

The value of the taint.

" + }, + "Effect":{ + "shape":"ClusterKubernetesTaintEffect", + "documentation":"

The effect of the taint. Valid values are NoSchedule, PreferNoSchedule, and NoExecute.

" + } + }, + "documentation":"

A Kubernetes taint that can be applied to cluster nodes.

" + }, + "ClusterKubernetesTaintEffect":{ + "type":"string", + "enum":[ + "NoSchedule", + "PreferNoSchedule", + "NoExecute" + ] + }, + "ClusterKubernetesTaintKey":{ + "type":"string", + "max":317, + "min":1, + "pattern":"([a-z0-9]([-a-z0-9]*[a-z0-9])?(\\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?[A-Za-z0-9]([-A-Za-z0-9_.]*[A-Za-z0-9])?" + }, + "ClusterKubernetesTaintValue":{ + "type":"string", + "max":63, + "min":1, + "pattern":"(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])?" + }, + "ClusterKubernetesTaints":{ + "type":"list", + "member":{"shape":"ClusterKubernetesTaint"}, + "max":50, + "min":0 + }, "ClusterLifeCycleConfig":{ "type":"structure", "required":[ @@ -9340,6 +9528,14 @@ "UltraServerInfo":{ "shape":"UltraServerInfo", "documentation":"

Contains information about the UltraServer.

" + }, + "KubernetesConfig":{ + "shape":"ClusterKubernetesConfigNodeDetails", + "documentation":"

The Kubernetes configuration applied to this node, showing both the current and desired state of labels and taints. The cluster works to reconcile the actual state with the declared state.

" + }, + "CapacityType":{ + "shape":"ClusterCapacityType", + "documentation":"

The capacity type of the node. Valid values are OnDemand and Spot. When set to OnDemand, the node is launched as an On-Demand instance. When set to Spot, the node is launched as a Spot instance.

" } }, "documentation":"

Details of an instance (also called a node interchangeably) in a SageMaker HyperPod cluster.

" @@ -9437,6 +9633,11 @@ "box":true, "min":0 }, + "ClusterOnDemandOptions":{ + "type":"structure", + "members":{}, + "documentation":"

Configuration options specific to On-Demand instances.

" + }, "ClusterOrchestrator":{ "type":"structure", "required":["Eks"], @@ -9660,6 +9861,11 @@ "NAME" ] }, + "ClusterSpotOptions":{ + "type":"structure", + "members":{}, + "documentation":"

Configuration options specific to Spot instances.

" + }, "ClusterStatus":{ "type":"string", "enum":[ @@ -12876,6 +13082,10 @@ "shape":"OptimizationJobDeploymentInstanceType", "documentation":"

The type of instance that hosts the optimized model that you create with the optimization job.

" }, + "MaxInstanceCount":{ + "shape":"OptimizationJobMaxInstanceCount", + "documentation":"

The maximum number of instances to use for the optimization job.

" + }, "OptimizationEnvironment":{ "shape":"OptimizationJobEnvironmentVariables", "documentation":"

The environment variables to set in the model container.

" @@ -19018,6 +19228,10 @@ "shape":"OptimizationJobDeploymentInstanceType", "documentation":"

The type of instance that hosts the optimized model that you create with the optimization job.

" }, + "MaxInstanceCount":{ + "shape":"OptimizationJobMaxInstanceCount", + "documentation":"

The maximum number of instances to use for the optimization job.

" + }, "OptimizationConfigs":{ "shape":"OptimizationConfigs", "documentation":"

Settings for each of the optimization techniques that the job applies.

" @@ -25882,6 +26096,10 @@ "shape":"TargetCount", "documentation":"

The desired number of instances for the group after scaling.

" }, + "MinCount":{ + "shape":"InstanceCount", + "documentation":"

Minimum instance count of the instance group.

" + }, "FailureMessage":{ "shape":"String", "documentation":"

An error message describing why the scaling operation failed, if applicable.

" @@ -33238,6 +33456,50 @@ "CreationTime" ] }, + "ModelSpeculativeDecodingConfig":{ + "type":"structure", + "required":["Technique"], + "members":{ + "Technique":{ + "shape":"ModelSpeculativeDecodingTechnique", + "documentation":"

The speculative decoding technique to apply during model optimization.

" + }, + "TrainingDataSource":{ + "shape":"ModelSpeculativeDecodingTrainingDataSource", + "documentation":"

The location of the training data to use for speculative decoding. The data must be formatted as ShareGPT, OpenAI Completions or OpenAI Chat Completions. The input can also be unencrypted captured data from a SageMaker endpoint as long as the endpoint uses one of the above formats.

" + } + }, + "documentation":"

Settings for the model speculative decoding technique that's applied by a model optimization job.

" + }, + "ModelSpeculativeDecodingS3DataType":{ + "type":"string", + "enum":[ + "S3Prefix", + "ManifestFile" + ] + }, + "ModelSpeculativeDecodingTechnique":{ + "type":"string", + "enum":["EAGLE"] + }, + "ModelSpeculativeDecodingTrainingDataSource":{ + "type":"structure", + "required":[ + "S3Uri", + "S3DataType" + ], + "members":{ + "S3Uri":{ + "shape":"S3Uri", + "documentation":"

The Amazon S3 URI that points to the training data for speculative decoding.

" + }, + "S3DataType":{ + "shape":"ModelSpeculativeDecodingS3DataType", + "documentation":"

The type of data stored in the Amazon S3 location. Valid values are S3Prefix or ManifestFile.

" + } + }, + "documentation":"

Contains information about the training data source for speculative decoding.

" + }, "ModelStepMetadata":{ "type":"structure", "members":{ @@ -34787,6 +35049,10 @@ "ModelShardingConfig":{ "shape":"ModelShardingConfig", "documentation":"

Settings for the model sharding technique that's applied by a model optimization job.

" + }, + "ModelSpeculativeDecodingConfig":{ + "shape":"ModelSpeculativeDecodingConfig", + "documentation":"

Settings for the model speculative decoding technique that's applied by a model optimization job.

" } }, "documentation":"

Settings for an optimization technique that you apply with a model optimization job.

", @@ -34816,6 +35082,8 @@ "ml.p4d.24xlarge", "ml.p4de.24xlarge", "ml.p5.48xlarge", + "ml.p5e.48xlarge", + "ml.p5en.48xlarge", "ml.g5.xlarge", "ml.g5.2xlarge", "ml.g5.4xlarge", @@ -34856,12 +35124,21 @@ "max":25, "min":0 }, + "OptimizationJobMaxInstanceCount":{ + "type":"integer", + "box":true, + "min":1 + }, "OptimizationJobModelSource":{ "type":"structure", "members":{ "S3":{ "shape":"OptimizationJobModelSourceS3", "documentation":"

The Amazon S3 location of a source model to optimize with an optimization job.

" + }, + "SageMakerModel":{ + "shape":"OptimizationSageMakerModel", + "documentation":"

The name of an existing SageMaker model to optimize with an optimization job.

" } }, "documentation":"

The location of the source model to optimize with an optimization job.

" @@ -34891,6 +35168,10 @@ "S3OutputLocation":{ "shape":"S3Uri", "documentation":"

The Amazon S3 URI for where to store the optimized model that you create with an optimization job.

" + }, + "SageMakerModel":{ + "shape":"OptimizationSageMakerModel", + "documentation":"

The name of a SageMaker model to use as the output destination for an optimization job.

" } }, "documentation":"

Details for where to store the optimized model that you create with the optimization job.

" @@ -34953,6 +35234,10 @@ "shape":"OptimizationJobDeploymentInstanceType", "documentation":"

The type of instance that hosts the optimized model that you create with the optimization job.

" }, + "MaxInstanceCount":{ + "shape":"OptimizationJobMaxInstanceCount", + "documentation":"

The maximum number of instances to use for the optimization job.

" + }, "OptimizationTypes":{ "shape":"OptimizationTypes", "documentation":"

The optimization techniques that are applied by the optimization job.

" @@ -34983,6 +35268,16 @@ }, "documentation":"

Output values produced by an optimization job.

" }, + "OptimizationSageMakerModel":{ + "type":"structure", + "members":{ + "ModelName":{ + "shape":"ModelName", + "documentation":"

The name of a SageMaker model.

" + } + }, + "documentation":"

A SageMaker model to use as the source or destination for an optimization job.

" + }, "OptimizationType":{"type":"string"}, "OptimizationTypes":{ "type":"list", diff --git a/src/sagemaker_core/main/code_injection/shape_dag.py b/src/sagemaker_core/main/code_injection/shape_dag.py index a683a65..90af96e 100644 --- a/src/sagemaker_core/main/code_injection/shape_dag.py +++ b/src/sagemaker_core/main/code_injection/shape_dag.py @@ -35,6 +35,13 @@ ], "type": "structure", }, + "ActiveOperations": { + "key_shape": "ActiveClusterOperationName", + "key_type": "string", + "type": "map", + "value_shape": "ActiveClusterOperationCount", + "value_type": "integer", + }, "AddAssociationRequest": { "members": [ {"name": "SourceArn", "shape": "AssociationEntityArn", "type": "string"}, @@ -1618,6 +1625,13 @@ ], "type": "structure", }, + "ClusterCapacityRequirements": { + "members": [ + {"name": "Spot", "shape": "ClusterSpotOptions", "type": "structure"}, + {"name": "OnDemand", "shape": "ClusterOnDemandOptions", "type": "structure"}, + ], + "type": "structure", + }, "ClusterEbsVolumeConfig": { "members": [ {"name": "VolumeSizeInGB", "shape": "ClusterEbsVolumeSizeInGB", "type": "integer"}, @@ -1662,6 +1676,7 @@ "members": [ {"name": "CurrentCount", "shape": "ClusterNonNegativeInstanceCount", "type": "integer"}, {"name": "TargetCount", "shape": "ClusterInstanceCount", "type": "integer"}, + {"name": "MinCount", "shape": "ClusterInstanceCount", "type": "integer"}, {"name": "InstanceGroupName", "shape": "ClusterInstanceGroupName", "type": "string"}, {"name": "InstanceType", "shape": "ClusterInstanceType", "type": "string"}, {"name": "LifeCycleConfig", "shape": "ClusterLifeCycleConfig", "type": "structure"}, @@ -1688,6 +1703,17 @@ }, {"name": "CurrentImageId", "shape": "ImageId", "type": "string"}, {"name": "DesiredImageId", "shape": "ImageId", "type": "string"}, + {"name": "ActiveOperations", "shape": "ActiveOperations", "type": "map"}, + { + "name": "KubernetesConfig", + "shape": "ClusterKubernetesConfigDetails", + "type": "structure", + }, + { + "name": "CapacityRequirements", + "shape": "ClusterCapacityRequirements", + "type": "structure", + }, {"name": "TargetStateCount", "shape": "ClusterInstanceCount", "type": "integer"}, {"name": "SoftwareUpdateStatus", "shape": "SoftwareUpdateStatus", "type": "string"}, { @@ -1706,6 +1732,7 @@ "ClusterInstanceGroupSpecification": { "members": [ {"name": "InstanceCount", "shape": "ClusterInstanceCount", "type": "integer"}, + {"name": "MinInstanceCount", "shape": "ClusterInstanceCount", "type": "integer"}, {"name": "InstanceGroupName", "shape": "ClusterInstanceGroupName", "type": "string"}, {"name": "InstanceType", "shape": "ClusterInstanceType", "type": "string"}, {"name": "LifeCycleConfig", "shape": "ClusterLifeCycleConfig", "type": "structure"}, @@ -1725,6 +1752,12 @@ "type": "structure", }, {"name": "ImageId", "shape": "ImageId", "type": "string"}, + {"name": "KubernetesConfig", "shape": "ClusterKubernetesConfig", "type": "structure"}, + { + "name": "CapacityRequirements", + "shape": "ClusterCapacityRequirements", + "type": "structure", + }, ], "type": "structure", }, @@ -1763,6 +1796,51 @@ "member_type": "structure", "type": "list", }, + "ClusterKubernetesConfig": { + "members": [ + {"name": "Labels", "shape": "ClusterKubernetesLabels", "type": "map"}, + {"name": "Taints", "shape": "ClusterKubernetesTaints", "type": "list"}, + ], + "type": "structure", + }, + "ClusterKubernetesConfigDetails": { + "members": [ + {"name": "CurrentLabels", "shape": "ClusterKubernetesLabels", "type": "map"}, + {"name": "DesiredLabels", "shape": "ClusterKubernetesLabels", "type": "map"}, + {"name": "CurrentTaints", "shape": "ClusterKubernetesTaints", "type": "list"}, + {"name": "DesiredTaints", "shape": "ClusterKubernetesTaints", "type": "list"}, + ], + "type": "structure", + }, + "ClusterKubernetesConfigNodeDetails": { + "members": [ + {"name": "CurrentLabels", "shape": "ClusterKubernetesLabels", "type": "map"}, + {"name": "DesiredLabels", "shape": "ClusterKubernetesLabels", "type": "map"}, + {"name": "CurrentTaints", "shape": "ClusterKubernetesTaints", "type": "list"}, + {"name": "DesiredTaints", "shape": "ClusterKubernetesTaints", "type": "list"}, + ], + "type": "structure", + }, + "ClusterKubernetesLabels": { + "key_shape": "ClusterKubernetesLabelKey", + "key_type": "string", + "type": "map", + "value_shape": "ClusterKubernetesLabelValue", + "value_type": "string", + }, + "ClusterKubernetesTaint": { + "members": [ + {"name": "Key", "shape": "ClusterKubernetesTaintKey", "type": "string"}, + {"name": "Value", "shape": "ClusterKubernetesTaintValue", "type": "string"}, + {"name": "Effect", "shape": "ClusterKubernetesTaintEffect", "type": "string"}, + ], + "type": "structure", + }, + "ClusterKubernetesTaints": { + "member_shape": "ClusterKubernetesTaint", + "member_type": "structure", + "type": "list", + }, "ClusterLifeCycleConfig": { "members": [ {"name": "SourceS3Uri", "shape": "S3Uri", "type": "string"}, @@ -1806,6 +1884,12 @@ {"name": "CurrentImageId", "shape": "ImageId", "type": "string"}, {"name": "DesiredImageId", "shape": "ImageId", "type": "string"}, {"name": "UltraServerInfo", "shape": "UltraServerInfo", "type": "structure"}, + { + "name": "KubernetesConfig", + "shape": "ClusterKubernetesConfigNodeDetails", + "type": "structure", + }, + {"name": "CapacityType", "shape": "ClusterCapacityType", "type": "string"}, ], "type": "structure", }, @@ -1838,6 +1922,7 @@ ], "type": "structure", }, + "ClusterOnDemandOptions": {"members": [], "type": "structure"}, "ClusterOrchestrator": { "members": [{"name": "Eks", "shape": "ClusterOrchestratorEksConfig", "type": "structure"}], "type": "structure", @@ -1937,6 +2022,7 @@ "member_type": "structure", "type": "list", }, + "ClusterSpotOptions": {"members": [], "type": "structure"}, "ClusterSummaries": { "member_shape": "ClusterSummary", "member_type": "structure", @@ -3347,6 +3433,11 @@ "shape": "OptimizationJobDeploymentInstanceType", "type": "string", }, + { + "name": "MaxInstanceCount", + "shape": "OptimizationJobMaxInstanceCount", + "type": "integer", + }, { "name": "OptimizationEnvironment", "shape": "OptimizationJobEnvironmentVariables", @@ -6144,6 +6235,11 @@ "shape": "OptimizationJobDeploymentInstanceType", "type": "string", }, + { + "name": "MaxInstanceCount", + "shape": "OptimizationJobMaxInstanceCount", + "type": "integer", + }, {"name": "OptimizationConfigs", "shape": "OptimizationConfigs", "type": "list"}, {"name": "OutputConfig", "shape": "OptimizationJobOutputConfig", "type": "structure"}, {"name": "OptimizationOutput", "shape": "OptimizationOutput", "type": "structure"}, @@ -8886,6 +8982,7 @@ "members": [ {"name": "InstanceCount", "shape": "InstanceCount", "type": "integer"}, {"name": "TargetCount", "shape": "TargetCount", "type": "integer"}, + {"name": "MinCount", "shape": "InstanceCount", "type": "integer"}, {"name": "FailureMessage", "shape": "String", "type": "string"}, ], "type": "structure", @@ -11956,6 +12053,24 @@ ], "type": "structure", }, + "ModelSpeculativeDecodingConfig": { + "members": [ + {"name": "Technique", "shape": "ModelSpeculativeDecodingTechnique", "type": "string"}, + { + "name": "TrainingDataSource", + "shape": "ModelSpeculativeDecodingTrainingDataSource", + "type": "structure", + }, + ], + "type": "structure", + }, + "ModelSpeculativeDecodingTrainingDataSource": { + "members": [ + {"name": "S3Uri", "shape": "S3Uri", "type": "string"}, + {"name": "S3DataType", "shape": "ModelSpeculativeDecodingS3DataType", "type": "string"}, + ], + "type": "structure", + }, "ModelStepMetadata": { "members": [{"name": "Arn", "shape": "String256", "type": "string"}], "type": "structure", @@ -12578,6 +12693,11 @@ "type": "structure", }, {"name": "ModelShardingConfig", "shape": "ModelShardingConfig", "type": "structure"}, + { + "name": "ModelSpeculativeDecodingConfig", + "shape": "ModelSpeculativeDecodingConfig", + "type": "structure", + }, ], "type": "structure", }, @@ -12594,7 +12714,10 @@ "value_type": "string", }, "OptimizationJobModelSource": { - "members": [{"name": "S3", "shape": "OptimizationJobModelSourceS3", "type": "structure"}], + "members": [ + {"name": "S3", "shape": "OptimizationJobModelSourceS3", "type": "structure"}, + {"name": "SageMakerModel", "shape": "OptimizationSageMakerModel", "type": "structure"}, + ], "type": "structure", }, "OptimizationJobModelSourceS3": { @@ -12612,6 +12735,7 @@ "members": [ {"name": "KmsKeyId", "shape": "KmsKeyId", "type": "string"}, {"name": "S3OutputLocation", "shape": "S3Uri", "type": "string"}, + {"name": "SageMakerModel", "shape": "OptimizationSageMakerModel", "type": "structure"}, ], "type": "structure", }, @@ -12634,6 +12758,11 @@ "shape": "OptimizationJobDeploymentInstanceType", "type": "string", }, + { + "name": "MaxInstanceCount", + "shape": "OptimizationJobMaxInstanceCount", + "type": "integer", + }, {"name": "OptimizationTypes", "shape": "OptimizationTypes", "type": "list"}, ], "type": "structure", @@ -12654,6 +12783,10 @@ ], "type": "structure", }, + "OptimizationSageMakerModel": { + "members": [{"name": "ModelName", "shape": "ModelName", "type": "string"}], + "type": "structure", + }, "OptimizationTypes": { "member_shape": "OptimizationType", "member_type": "string", diff --git a/src/sagemaker_core/main/resources.py b/src/sagemaker_core/main/resources.py index 0e4d21c..86852b9 100644 --- a/src/sagemaker_core/main/resources.py +++ b/src/sagemaker_core/main/resources.py @@ -23559,6 +23559,7 @@ class OptimizationJob(Base): optimization_end_time: The time when the optimization job finished processing. failure_reason: If the optimization job status is FAILED, the reason for the failure. optimization_environment: The environment variables to set in the model container. + max_instance_count: The maximum number of instances to use for the optimization job. optimization_output: Output values produced by an optimization job. vpc_config: A VPC in Amazon VPC that your optimized model has access to. @@ -23575,6 +23576,7 @@ class OptimizationJob(Base): model_source: Optional[shapes.OptimizationJobModelSource] = Unassigned() optimization_environment: Optional[Dict[str, str]] = Unassigned() deployment_instance_type: Optional[str] = Unassigned() + max_instance_count: Optional[int] = Unassigned() optimization_configs: Optional[List[shapes.OptimizationConfig]] = Unassigned() output_config: Optional[shapes.OptimizationJobOutputConfig] = Unassigned() optimization_output: Optional[shapes.OptimizationOutput] = Unassigned() @@ -23634,6 +23636,7 @@ def create( optimization_configs: List[shapes.OptimizationConfig], output_config: shapes.OptimizationJobOutputConfig, stopping_condition: shapes.StoppingCondition, + max_instance_count: Optional[int] = Unassigned(), optimization_environment: Optional[Dict[str, str]] = Unassigned(), tags: Optional[List[shapes.Tag]] = Unassigned(), vpc_config: Optional[shapes.OptimizationVpcConfig] = Unassigned(), @@ -23651,6 +23654,7 @@ def create( optimization_configs: Settings for each of the optimization techniques that the job applies. output_config: Details for where to store the optimized model that you create with the optimization job. stopping_condition: + max_instance_count: The maximum number of instances to use for the optimization job. optimization_environment: The environment variables to set in the model container. tags: A list of key-value pairs associated with the optimization job. For more information, see Tagging Amazon Web Services resources in the Amazon Web Services General Reference Guide. vpc_config: A VPC in Amazon VPC that your optimized model has access to. @@ -23687,6 +23691,7 @@ def create( "RoleArn": role_arn, "ModelSource": model_source, "DeploymentInstanceType": deployment_instance_type, + "MaxInstanceCount": max_instance_count, "OptimizationEnvironment": optimization_environment, "OptimizationConfigs": optimization_configs, "OutputConfig": output_config, diff --git a/src/sagemaker_core/main/shapes.py b/src/sagemaker_core/main/shapes.py index d927e09..a8dd764 100644 --- a/src/sagemaker_core/main/shapes.py +++ b/src/sagemaker_core/main/shapes.py @@ -3538,6 +3538,41 @@ class ClusterAutoScalingConfigOutput(Base): failure_message: Optional[str] = Unassigned() +class ClusterSpotOptions(Base): + """ + ClusterSpotOptions + Configuration options specific to Spot instances. + + Attributes + ---------------------- + """ + + +class ClusterOnDemandOptions(Base): + """ + ClusterOnDemandOptions + Configuration options specific to On-Demand instances. + + Attributes + ---------------------- + """ + + +class ClusterCapacityRequirements(Base): + """ + ClusterCapacityRequirements + Defines the instance capacity requirements for an instance group, including configurations for both Spot and On-Demand capacity types. + + Attributes + ---------------------- + spot: Configuration options specific to Spot instances. + on_demand: Configuration options specific to On-Demand instances. + """ + + spot: Optional[ClusterSpotOptions] = Unassigned() + on_demand: Optional[ClusterOnDemandOptions] = Unassigned() + + class ClusterEbsVolumeConfig(Base): """ ClusterEbsVolumeConfig @@ -3604,11 +3639,13 @@ class InstanceGroupScalingMetadata(Base): ---------------------- instance_count: The current number of instances in the group. target_count: The desired number of instances for the group after scaling. + min_count: Minimum instance count of the instance group. failure_message: An error message describing why the scaling operation failed, if applicable. """ instance_count: Optional[int] = Unassigned() target_count: Optional[int] = Unassigned() + min_count: Optional[int] = Unassigned() failure_message: Optional[str] = Unassigned() @@ -3798,6 +3835,42 @@ class ScheduledUpdateConfig(Base): deployment_config: Optional[DeploymentConfiguration] = Unassigned() +class ClusterKubernetesTaint(Base): + """ + ClusterKubernetesTaint + A Kubernetes taint that can be applied to cluster nodes. + + Attributes + ---------------------- + key: The key of the taint. + value: The value of the taint. + effect: The effect of the taint. Valid values are NoSchedule, PreferNoSchedule, and NoExecute. + """ + + key: str + effect: str + value: Optional[str] = Unassigned() + + +class ClusterKubernetesConfigDetails(Base): + """ + ClusterKubernetesConfigDetails + Detailed Kubernetes configuration showing both the current and desired state of labels and taints for cluster nodes. + + Attributes + ---------------------- + current_labels: The current labels applied to cluster nodes of an instance group. + desired_labels: The desired labels to be applied to cluster nodes of an instance group. + current_taints: The current taints applied to cluster nodes of an instance group. + desired_taints: The desired taints to be applied to cluster nodes of an instance group. + """ + + current_labels: Optional[Dict[str, str]] = Unassigned() + desired_labels: Optional[Dict[str, str]] = Unassigned() + current_taints: Optional[List[ClusterKubernetesTaint]] = Unassigned() + desired_taints: Optional[List[ClusterKubernetesTaint]] = Unassigned() + + class ClusterInstanceGroupDetails(Base): """ ClusterInstanceGroupDetails @@ -3807,6 +3880,7 @@ class ClusterInstanceGroupDetails(Base): ---------------------- current_count: The number of instances that are currently in the instance group of a SageMaker HyperPod cluster. target_count: The number of instances you specified to add to the instance group of a SageMaker HyperPod cluster. + min_count: The minimum number of instances that must be available in the instance group of a SageMaker HyperPod cluster before it transitions to InService status. instance_group_name: The name of the instance group of a SageMaker HyperPod cluster. instance_type: The instance type of the instance group of a SageMaker HyperPod cluster. life_cycle_config: Details of LifeCycle configuration for the instance group. @@ -3821,6 +3895,9 @@ class ClusterInstanceGroupDetails(Base): scheduled_update_config: The configuration object of the schedule that SageMaker follows when updating the AMI. current_image_id: The ID of the Amazon Machine Image (AMI) currently in use by the instance group. desired_image_id: The ID of the Amazon Machine Image (AMI) desired for the instance group. + active_operations: A map indicating active operations currently in progress for the instance group of a SageMaker HyperPod cluster. When there is a scaling operation in progress, this map contains a key Scaling with value 1. + kubernetes_config: The Kubernetes configuration for the instance group that contains labels and taints to be applied for the nodes in this instance group. + capacity_requirements: The instance capacity requirements for the instance group. target_state_count: The number of nodes running a specific image ID since the last software update request. software_update_status: Status of the last software udpate request. active_software_update_config @@ -3828,6 +3905,7 @@ class ClusterInstanceGroupDetails(Base): current_count: Optional[int] = Unassigned() target_count: Optional[int] = Unassigned() + min_count: Optional[int] = Unassigned() instance_group_name: Optional[str] = Unassigned() instance_type: Optional[str] = Unassigned() life_cycle_config: Optional[ClusterLifeCycleConfig] = Unassigned() @@ -3842,11 +3920,29 @@ class ClusterInstanceGroupDetails(Base): scheduled_update_config: Optional[ScheduledUpdateConfig] = Unassigned() current_image_id: Optional[str] = Unassigned() desired_image_id: Optional[str] = Unassigned() + active_operations: Optional[Dict[str, int]] = Unassigned() + kubernetes_config: Optional[ClusterKubernetesConfigDetails] = Unassigned() + capacity_requirements: Optional[ClusterCapacityRequirements] = Unassigned() target_state_count: Optional[int] = Unassigned() software_update_status: Optional[str] = Unassigned() active_software_update_config: Optional[DeploymentConfiguration] = Unassigned() +class ClusterKubernetesConfig(Base): + """ + ClusterKubernetesConfig + Kubernetes configuration that specifies labels and taints to be applied to cluster nodes in an instance group. + + Attributes + ---------------------- + labels: Key-value pairs of labels to be applied to cluster nodes. + taints: List of taints to be applied to cluster nodes. + """ + + labels: Optional[Dict[str, str]] = Unassigned() + taints: Optional[List[ClusterKubernetesTaint]] = Unassigned() + + class ClusterInstanceGroupSpecification(Base): """ ClusterInstanceGroupSpecification @@ -3855,6 +3951,7 @@ class ClusterInstanceGroupSpecification(Base): Attributes ---------------------- instance_count: Specifies the number of instances to add to the instance group of a SageMaker HyperPod cluster. + min_instance_count: Defines the minimum number of instances required for an instance group to become InService. If this threshold isn't met within 3 hours, the instance group rolls back to its previous state - zero instances for new instance groups, or previous settings for existing instance groups. MinInstanceCount only affects the initial transition to InService and does not guarantee maintaining this minimum afterward. instance_group_name: Specifies the name of the instance group. instance_type: Specifies the instance type of the instance group. life_cycle_config: Specifies the LifeCycle configuration for the instance group. @@ -3866,6 +3963,8 @@ class ClusterInstanceGroupSpecification(Base): override_vpc_config: To configure multi-AZ deployments, customize the Amazon VPC configuration at the instance group level. You can specify different subnets and security groups across different AZs in the instance group specification to override a SageMaker HyperPod cluster's default Amazon VPC configuration. For more information about deploying a cluster in multiple AZs, see Setting up SageMaker HyperPod clusters across multiple AZs. When your Amazon VPC and subnets support IPv6, network communications differ based on the cluster orchestration platform: Slurm-orchestrated clusters automatically configure nodes with dual IPv6 and IPv4 addresses, allowing immediate IPv6 network communications. In Amazon EKS-orchestrated clusters, nodes receive dual-stack addressing, but pods can only use IPv6 when the Amazon EKS cluster is explicitly IPv6-enabled. For information about deploying an IPv6 Amazon EKS cluster, see Amazon EKS IPv6 Cluster Deployment. Additional resources for IPv6 configuration: For information about adding IPv6 support to your VPC, see to IPv6 Support for VPC. For information about creating a new IPv6-compatible VPC, see Amazon VPC Creation Guide. To configure SageMaker HyperPod with a custom Amazon VPC, see Custom Amazon VPC Setup for SageMaker HyperPod. scheduled_update_config: The configuration object of the schedule that SageMaker uses to update the AMI. image_id: When configuring your HyperPod cluster, you can specify an image ID using one of the following options: HyperPodPublicAmiId: Use a HyperPod public AMI CustomAmiId: Use your custom AMI default: Use the default latest system image If you choose to use a custom AMI (CustomAmiId), ensure it meets the following requirements: Encryption: The custom AMI must be unencrypted. Ownership: The custom AMI must be owned by the same Amazon Web Services account that is creating the HyperPod cluster. Volume support: Only the primary AMI snapshot volume is supported; additional AMI volumes are not supported. When updating the instance group's AMI through the UpdateClusterSoftware operation, if an instance group uses a custom AMI, you must provide an ImageId or use the default as input. Note that if you don't specify an instance group in your UpdateClusterSoftware request, then all of the instance groups are patched with the specified image. + kubernetes_config: Specifies the Kubernetes configuration for the instance group. You describe what you want the labels and taints to look like, and the cluster works to reconcile the actual state with the declared state for nodes in this instance group. + capacity_requirements: Specifies the capacity requirements for the instance group. """ instance_count: int @@ -3873,6 +3972,7 @@ class ClusterInstanceGroupSpecification(Base): instance_type: str life_cycle_config: ClusterLifeCycleConfig execution_role: str + min_instance_count: Optional[int] = Unassigned() threads_per_core: Optional[int] = Unassigned() instance_storage_configs: Optional[List[ClusterInstanceStorageConfig]] = Unassigned() on_start_deep_health_checks: Optional[List[str]] = Unassigned() @@ -3880,6 +3980,8 @@ class ClusterInstanceGroupSpecification(Base): override_vpc_config: Optional[VpcConfig] = Unassigned() scheduled_update_config: Optional[ScheduledUpdateConfig] = Unassigned() image_id: Optional[str] = Unassigned() + kubernetes_config: Optional[ClusterKubernetesConfig] = Unassigned() + capacity_requirements: Optional[ClusterCapacityRequirements] = Unassigned() class ClusterInstancePlacement(Base): @@ -3912,6 +4014,25 @@ class ClusterInstanceStatusDetails(Base): message: Optional[str] = Unassigned() +class ClusterKubernetesConfigNodeDetails(Base): + """ + ClusterKubernetesConfigNodeDetails + Node-specific Kubernetes configuration showing both current and desired state of labels and taints for an individual cluster node. + + Attributes + ---------------------- + current_labels: The current labels applied to the cluster node. + desired_labels: The desired labels to be applied to the cluster node. + current_taints: The current taints applied to the cluster node. + desired_taints: The desired taints to be applied to the cluster node. + """ + + current_labels: Optional[Dict[str, str]] = Unassigned() + desired_labels: Optional[Dict[str, str]] = Unassigned() + current_taints: Optional[List[ClusterKubernetesTaint]] = Unassigned() + desired_taints: Optional[List[ClusterKubernetesTaint]] = Unassigned() + + class UltraServerInfo(Base): """ UltraServerInfo @@ -3950,6 +4071,8 @@ class ClusterNodeDetails(Base): current_image_id: The ID of the Amazon Machine Image (AMI) currently in use by the node. desired_image_id: The ID of the Amazon Machine Image (AMI) desired for the node. ultra_server_info: Contains information about the UltraServer. + kubernetes_config: The Kubernetes configuration applied to this node, showing both the current and desired state of labels and taints. The cluster works to reconcile the actual state with the declared state. + capacity_type: The capacity type of the node. Valid values are OnDemand and Spot. When set to OnDemand, the node is launched as an On-Demand instance. When set to Spot, the node is launched as a Spot instance. """ instance_group_name: Optional[str] = Unassigned() @@ -3970,6 +4093,8 @@ class ClusterNodeDetails(Base): current_image_id: Optional[str] = Unassigned() desired_image_id: Optional[str] = Unassigned() ultra_server_info: Optional[UltraServerInfo] = Unassigned() + kubernetes_config: Optional[ClusterKubernetesConfigNodeDetails] = Unassigned() + capacity_type: Optional[str] = Unassigned() class ClusterNodeSummary(Base): @@ -7767,6 +7892,19 @@ class OptimizationJobModelSourceS3(Base): model_access_config: Optional[OptimizationModelAccessConfig] = Unassigned() +class OptimizationSageMakerModel(Base): + """ + OptimizationSageMakerModel + A SageMaker model to use as the source or destination for an optimization job. + + Attributes + ---------------------- + model_name: The name of a SageMaker model. + """ + + model_name: Optional[Union[str, object]] = Unassigned() + + class OptimizationJobModelSource(Base): """ OptimizationJobModelSource @@ -7775,9 +7913,11 @@ class OptimizationJobModelSource(Base): Attributes ---------------------- s3: The Amazon S3 location of a source model to optimize with an optimization job. + sage_maker_model: The name of an existing SageMaker model to optimize with an optimization job. """ s3: Optional[OptimizationJobModelSourceS3] = Unassigned() + sage_maker_model: Optional[OptimizationSageMakerModel] = Unassigned() class ModelQuantizationConfig(Base): @@ -7825,6 +7965,36 @@ class ModelShardingConfig(Base): override_environment: Optional[Dict[str, str]] = Unassigned() +class ModelSpeculativeDecodingTrainingDataSource(Base): + """ + ModelSpeculativeDecodingTrainingDataSource + Contains information about the training data source for speculative decoding. + + Attributes + ---------------------- + s3_uri: The Amazon S3 URI that points to the training data for speculative decoding. + s3_data_type: The type of data stored in the Amazon S3 location. Valid values are S3Prefix or ManifestFile. + """ + + s3_uri: str + s3_data_type: str + + +class ModelSpeculativeDecodingConfig(Base): + """ + ModelSpeculativeDecodingConfig + Settings for the model speculative decoding technique that's applied by a model optimization job. + + Attributes + ---------------------- + technique: The speculative decoding technique to apply during model optimization. + training_data_source: The location of the training data to use for speculative decoding. The data must be formatted as ShareGPT, OpenAI Completions or OpenAI Chat Completions. The input can also be unencrypted captured data from a SageMaker endpoint as long as the endpoint uses one of the above formats. + """ + + technique: str + training_data_source: Optional[ModelSpeculativeDecodingTrainingDataSource] = Unassigned() + + class OptimizationConfig(Base): """ OptimizationConfig @@ -7835,11 +8005,13 @@ class OptimizationConfig(Base): model_quantization_config: Settings for the model quantization technique that's applied by a model optimization job. model_compilation_config: Settings for the model compilation technique that's applied by a model optimization job. model_sharding_config: Settings for the model sharding technique that's applied by a model optimization job. + model_speculative_decoding_config: Settings for the model speculative decoding technique that's applied by a model optimization job. """ model_quantization_config: Optional[ModelQuantizationConfig] = Unassigned() model_compilation_config: Optional[ModelCompilationConfig] = Unassigned() model_sharding_config: Optional[ModelShardingConfig] = Unassigned() + model_speculative_decoding_config: Optional[ModelSpeculativeDecodingConfig] = Unassigned() class OptimizationJobOutputConfig(Base): @@ -7851,10 +8023,12 @@ class OptimizationJobOutputConfig(Base): ---------------------- kms_key_id: The Amazon Resource Name (ARN) of a key in Amazon Web Services KMS. SageMaker uses they key to encrypt the artifacts of the optimized model when SageMaker uploads the model to Amazon S3. s3_output_location: The Amazon S3 URI for where to store the optimized model that you create with an optimization job. + sage_maker_model: The name of a SageMaker model to use as the output destination for an optimization job. """ s3_output_location: str kms_key_id: Optional[str] = Unassigned() + sage_maker_model: Optional[OptimizationSageMakerModel] = Unassigned() class OptimizationVpcConfig(Base): @@ -11994,6 +12168,7 @@ class OptimizationJobSummary(Base): optimization_end_time: The time when the optimization job finished processing. last_modified_time: The time when the optimization job was last updated. deployment_instance_type: The type of instance that hosts the optimized model that you create with the optimization job. + max_instance_count: The maximum number of instances to use for the optimization job. optimization_types: The optimization techniques that are applied by the optimization job. """ @@ -12006,6 +12181,7 @@ class OptimizationJobSummary(Base): optimization_start_time: Optional[datetime.datetime] = Unassigned() optimization_end_time: Optional[datetime.datetime] = Unassigned() last_modified_time: Optional[datetime.datetime] = Unassigned() + max_instance_count: Optional[int] = Unassigned() class PartnerAppSummary(Base):