Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
93 changes: 93 additions & 0 deletions sample/sagemaker/2017-07-24/service-2.json
Original file line number Diff line number Diff line change
Expand Up @@ -7023,6 +7023,10 @@
},
"documentation":"<p>The SageMaker Canvas application settings.</p>"
},
"CapacityReservationPreference":{
"type":"string",
"enum":["capacity-reservations-only"]
},
"CapacitySize":{
"type":"structure",
"required":[
Expand Down Expand Up @@ -19326,6 +19330,33 @@
},
"documentation":"<p>A collection of EBS storage settings that apply to both private and shared spaces.</p>"
},
"Ec2CapacityReservation":{
"type":"structure",
"members":{
"Ec2CapacityReservationId":{
"shape":"Ec2CapacityReservationId",
"documentation":"<p>The unique identifier for an EC2 capacity reservation that's part of the ML capacity reservation.</p>"
},
"TotalInstanceCount":{
"shape":"TaskCount",
"documentation":"<p>The number of instances that you allocated to the EC2 capacity reservation.</p>"
},
"AvailableInstanceCount":{
"shape":"TaskCount",
"documentation":"<p>The number of instances that are currently available in the EC2 capacity reservation.</p>"
},
"UsedByCurrentEndpoint":{
"shape":"TaskCount",
"documentation":"<p>The number of instances from the EC2 capacity reservation that are being used by the endpoint.</p>"
}
},
"documentation":"<p>The EC2 capacity reservations that are shared to an ML capacity reservation.</p>"
},
"Ec2CapacityReservationId":{"type":"string"},
"Ec2CapacityReservationsList":{
"type":"list",
"member":{"shape":"Ec2CapacityReservation"}
},
"Edge":{
"type":"structure",
"members":{
Expand Down Expand Up @@ -29342,6 +29373,12 @@
"min":0,
"pattern":"1|2"
},
"MlReservationArn":{
"type":"string",
"max":258,
"min":20,
"pattern":"arn:aws[a-z\\-]*:sagemaker:[a-z0-9\\-]*:[0-9]{12}:ml-reservation/.*"
},
"MlTools":{
"type":"string",
"enum":[
Expand Down Expand Up @@ -34305,6 +34342,10 @@
"InferenceAmiVersion":{
"shape":"ProductionVariantInferenceAmiVersion",
"documentation":"<p>Specifies an option from a collection of preconfigured Amazon Machine Image (AMI) images. Each image is configured by Amazon Web Services with a set of software and driver versions. Amazon Web Services optimizes these configurations for different machine learning workloads.</p> <p>By selecting an AMI version, you can ensure that your inference environment is compatible with specific software requirements, such as CUDA driver versions, Linux kernel versions, or Amazon Web Services Neuron driver versions.</p> <p>The AMI version names, and their configurations, are the following:</p> <dl> <dt>al2-ami-sagemaker-inference-gpu-2</dt> <dd> <ul> <li> <p>Accelerator: GPU</p> </li> <li> <p>NVIDIA driver version: 535</p> </li> <li> <p>CUDA version: 12.2</p> </li> </ul> </dd> <dt>al2-ami-sagemaker-inference-gpu-2-1</dt> <dd> <ul> <li> <p>Accelerator: GPU</p> </li> <li> <p>NVIDIA driver version: 535</p> </li> <li> <p>CUDA version: 12.2</p> </li> <li> <p>NVIDIA Container Toolkit with disabled CUDA-compat mounting</p> </li> </ul> </dd> <dt>al2-ami-sagemaker-inference-gpu-3-1</dt> <dd> <ul> <li> <p>Accelerator: GPU</p> </li> <li> <p>NVIDIA driver version: 550</p> </li> <li> <p>CUDA version: 12.4</p> </li> <li> <p>NVIDIA Container Toolkit with disabled CUDA-compat mounting</p> </li> </ul> </dd> <dt>al2-ami-sagemaker-inference-neuron-2</dt> <dd> <ul> <li> <p>Accelerator: Inferentia2 and Trainium</p> </li> <li> <p>Neuron driver version: 2.19</p> </li> </ul> </dd> </dl>"
},
"CapacityReservationConfig":{
"shape":"ProductionVariantCapacityReservationConfig",
"documentation":"<p>Settings for the capacity reservation for the compute instances that SageMaker AI reserves for an endpoint. </p>"
}
},
"documentation":"<p> Identifies a model that you want to host and the resources chosen to deploy for hosting it. If you are deploying multiple models, tell SageMaker how to distribute traffic among the models by specifying variant weights. For more information on production variants, check <a href=\"https://docs.aws.amazon.com/sagemaker/latest/dg/model-ab-testing.html\"> Production variants</a>. </p>"
Expand All @@ -34320,6 +34361,50 @@
"ml.eia2.xlarge"
]
},
"ProductionVariantCapacityReservationConfig":{
"type":"structure",
"members":{
"CapacityReservationPreference":{
"shape":"CapacityReservationPreference",
"documentation":"<p>Options that you can choose for the capacity reservation. SageMaker AI supports the following options:</p> <dl> <dt>capacity-reservations-only</dt> <dd> <p>SageMaker AI launches instances only into an ML capacity reservation. If no capacity is available, the instances fail to launch.</p> </dd> </dl>"
},
"MlReservationArn":{
"shape":"MlReservationArn",
"documentation":"<p>The Amazon Resource Name (ARN) that uniquely identifies the ML capacity reservation that SageMaker AI applies when it deploys the endpoint.</p>"
}
},
"documentation":"<p>Settings for the capacity reservation for the compute instances that SageMaker AI reserves for an endpoint. </p>"
},
"ProductionVariantCapacityReservationSummary":{
"type":"structure",
"members":{
"MlReservationArn":{
"shape":"MlReservationArn",
"documentation":"<p>The Amazon Resource Name (ARN) that uniquely identifies the ML capacity reservation that SageMaker AI applies when it deploys the endpoint.</p>"
},
"CapacityReservationPreference":{
"shape":"CapacityReservationPreference",
"documentation":"<p>The option that you chose for the capacity reservation. SageMaker AI supports the following options:</p> <dl> <dt>capacity-reservations-only</dt> <dd> <p>SageMaker AI launches instances only into an ML capacity reservation. If no capacity is available, the instances fail to launch.</p> </dd> </dl>"
},
"TotalInstanceCount":{
"shape":"TaskCount",
"documentation":"<p>The number of instances that you allocated to the ML capacity reservation.</p>"
},
"AvailableInstanceCount":{
"shape":"TaskCount",
"documentation":"<p>The number of instances that are currently available in the ML capacity reservation.</p>"
},
"UsedByCurrentEndpoint":{
"shape":"TaskCount",
"documentation":"<p>The number of instances from the ML capacity reservation that are being used by the endpoint.</p>"
},
"Ec2CapacityReservations":{
"shape":"Ec2CapacityReservationsList",
"documentation":"<p>The EC2 capacity reservations that are shared to this ML capacity reservation, if any.</p>"
}
},
"documentation":"<p>Details about an ML capacity reservation.</p>"
},
"ProductionVariantContainerStartupHealthCheckTimeoutInSeconds":{
"type":"integer",
"box":true,
Expand Down Expand Up @@ -34732,6 +34817,10 @@
"RoutingConfig":{
"shape":"ProductionVariantRoutingConfig",
"documentation":"<p>Settings that control how the endpoint routes incoming traffic to the instances that the endpoint hosts.</p>"
},
"CapacityReservationConfig":{
"shape":"ProductionVariantCapacityReservationSummary",
"documentation":"<p>Settings for the capacity reservation for the compute instances that SageMaker AI reserves for an endpoint. </p>"
}
},
"documentation":"<p>Describes weight and capacities for a production variant associated with an endpoint. If you sent a request to the <code>UpdateEndpointWeightsAndCapacities</code> API and the endpoint status is <code>Updating</code>, you get different desired and current values. </p>"
Expand Down Expand Up @@ -41161,6 +41250,10 @@
"ProjectS3Path":{
"shape":"S3Uri",
"documentation":"<p>The location where Amazon S3 stores temporary execution data and other artifacts for the project that corresponds to the domain.</p>"
},
"SingleSignOnApplicationArn":{
"shape":"SingleSignOnApplicationArn",
"documentation":"<p>The ARN of the application managed by SageMaker AI and SageMaker Unified Studio in the Amazon Web Services IAM Identity Center.</p>"
}
},
"documentation":"<p>The settings that apply to an Amazon SageMaker AI domain when you use it in Amazon SageMaker Unified Studio.</p>"
Expand Down
63 changes: 63 additions & 0 deletions src/sagemaker_core/main/code_injection/shape_dag.py
Original file line number Diff line number Diff line change
Expand Up @@ -6296,6 +6296,24 @@
],
"type": "structure",
},
"Ec2CapacityReservation": {
"members": [
{
"name": "Ec2CapacityReservationId",
"shape": "Ec2CapacityReservationId",
"type": "string",
},
{"name": "TotalInstanceCount", "shape": "TaskCount", "type": "integer"},
{"name": "AvailableInstanceCount", "shape": "TaskCount", "type": "integer"},
{"name": "UsedByCurrentEndpoint", "shape": "TaskCount", "type": "integer"},
],
"type": "structure",
},
"Ec2CapacityReservationsList": {
"member_shape": "Ec2CapacityReservation",
"member_type": "structure",
"type": "list",
},
"Edge": {
"members": [
{"name": "SourceArn", "shape": "AssociationEntityArn", "type": "string"},
Expand Down Expand Up @@ -12393,6 +12411,41 @@
"shape": "ProductionVariantInferenceAmiVersion",
"type": "string",
},
{
"name": "CapacityReservationConfig",
"shape": "ProductionVariantCapacityReservationConfig",
"type": "structure",
},
],
"type": "structure",
},
"ProductionVariantCapacityReservationConfig": {
"members": [
{
"name": "CapacityReservationPreference",
"shape": "CapacityReservationPreference",
"type": "string",
},
{"name": "MlReservationArn", "shape": "MlReservationArn", "type": "string"},
],
"type": "structure",
},
"ProductionVariantCapacityReservationSummary": {
"members": [
{"name": "MlReservationArn", "shape": "MlReservationArn", "type": "string"},
{
"name": "CapacityReservationPreference",
"shape": "CapacityReservationPreference",
"type": "string",
},
{"name": "TotalInstanceCount", "shape": "TaskCount", "type": "integer"},
{"name": "AvailableInstanceCount", "shape": "TaskCount", "type": "integer"},
{"name": "UsedByCurrentEndpoint", "shape": "TaskCount", "type": "integer"},
{
"name": "Ec2CapacityReservations",
"shape": "Ec2CapacityReservationsList",
"type": "list",
},
],
"type": "structure",
},
Expand Down Expand Up @@ -12493,6 +12546,11 @@
"shape": "ProductionVariantRoutingConfig",
"type": "structure",
},
{
"name": "CapacityReservationConfig",
"shape": "ProductionVariantCapacityReservationSummary",
"type": "structure",
},
],
"type": "structure",
},
Expand Down Expand Up @@ -14833,6 +14891,11 @@
{"name": "ProjectId", "shape": "UnifiedStudioProjectId", "type": "string"},
{"name": "EnvironmentId", "shape": "UnifiedStudioEnvironmentId", "type": "string"},
{"name": "ProjectS3Path", "shape": "S3Uri", "type": "string"},
{
"name": "SingleSignOnApplicationArn",
"shape": "SingleSignOnApplicationArn",
"type": "string",
},
],
"type": "structure",
},
Expand Down
65 changes: 65 additions & 0 deletions src/sagemaker_core/main/shapes.py
Original file line number Diff line number Diff line change
Expand Up @@ -4779,6 +4779,7 @@ class UnifiedStudioSettings(Base):
project_id: The ID of the Amazon SageMaker Unified Studio project that corresponds to the domain.
environment_id: The ID of the environment that Amazon SageMaker Unified Studio associates with the domain.
project_s3_path: The location where Amazon S3 stores temporary execution data and other artifacts for the project that corresponds to the domain.
single_sign_on_application_arn: The ARN of the application managed by SageMaker AI and SageMaker Unified Studio in the Amazon Web Services IAM Identity Center.
"""

studio_web_portal_access: Optional[str] = Unassigned()
Expand All @@ -4788,6 +4789,7 @@ class UnifiedStudioSettings(Base):
project_id: Optional[str] = Unassigned()
environment_id: Optional[str] = Unassigned()
project_s3_path: Optional[str] = Unassigned()
single_sign_on_application_arn: Optional[str] = Unassigned()


class DomainSettings(Base):
Expand Down Expand Up @@ -4966,6 +4968,21 @@ class ProductionVariantRoutingConfig(Base):
routing_strategy: str


class ProductionVariantCapacityReservationConfig(Base):
"""
ProductionVariantCapacityReservationConfig
Settings for the capacity reservation for the compute instances that SageMaker AI reserves for an endpoint.

Attributes
----------------------
capacity_reservation_preference: Options that you can choose for the capacity reservation. SageMaker AI supports the following options: capacity-reservations-only SageMaker AI launches instances only into an ML capacity reservation. If no capacity is available, the instances fail to launch.
ml_reservation_arn: The Amazon Resource Name (ARN) that uniquely identifies the ML capacity reservation that SageMaker AI applies when it deploys the endpoint.
"""

capacity_reservation_preference: Optional[str] = Unassigned()
ml_reservation_arn: Optional[str] = Unassigned()


class ProductionVariant(Base):
"""
ProductionVariant
Expand All @@ -4988,6 +5005,7 @@ class ProductionVariant(Base):
managed_instance_scaling: Settings that control the range in the number of instances that the endpoint provisions as it scales up or down to accommodate traffic.
routing_config: Settings that control how the endpoint routes incoming traffic to the instances that the endpoint hosts.
inference_ami_version: Specifies an option from a collection of preconfigured Amazon Machine Image (AMI) images. Each image is configured by Amazon Web Services with a set of software and driver versions. Amazon Web Services optimizes these configurations for different machine learning workloads. By selecting an AMI version, you can ensure that your inference environment is compatible with specific software requirements, such as CUDA driver versions, Linux kernel versions, or Amazon Web Services Neuron driver versions. The AMI version names, and their configurations, are the following: al2-ami-sagemaker-inference-gpu-2 Accelerator: GPU NVIDIA driver version: 535 CUDA version: 12.2 al2-ami-sagemaker-inference-gpu-2-1 Accelerator: GPU NVIDIA driver version: 535 CUDA version: 12.2 NVIDIA Container Toolkit with disabled CUDA-compat mounting al2-ami-sagemaker-inference-gpu-3-1 Accelerator: GPU NVIDIA driver version: 550 CUDA version: 12.4 NVIDIA Container Toolkit with disabled CUDA-compat mounting al2-ami-sagemaker-inference-neuron-2 Accelerator: Inferentia2 and Trainium Neuron driver version: 2.19
capacity_reservation_config: Settings for the capacity reservation for the compute instances that SageMaker AI reserves for an endpoint.
"""

variant_name: str
Expand All @@ -5005,6 +5023,7 @@ class ProductionVariant(Base):
managed_instance_scaling: Optional[ProductionVariantManagedInstanceScaling] = Unassigned()
routing_config: Optional[ProductionVariantRoutingConfig] = Unassigned()
inference_ami_version: Optional[str] = Unassigned()
capacity_reservation_config: Optional[ProductionVariantCapacityReservationConfig] = Unassigned()


class DataCaptureConfig(Base):
Expand Down Expand Up @@ -8164,6 +8183,48 @@ class ProductionVariantStatus(Base):
start_time: Optional[datetime.datetime] = Unassigned()


class Ec2CapacityReservation(Base):
"""
Ec2CapacityReservation
The EC2 capacity reservations that are shared to an ML capacity reservation.

Attributes
----------------------
ec2_capacity_reservation_id: The unique identifier for an EC2 capacity reservation that's part of the ML capacity reservation.
total_instance_count: The number of instances that you allocated to the EC2 capacity reservation.
available_instance_count: The number of instances that are currently available in the EC2 capacity reservation.
used_by_current_endpoint: The number of instances from the EC2 capacity reservation that are being used by the endpoint.
"""

ec2_capacity_reservation_id: Optional[str] = Unassigned()
total_instance_count: Optional[int] = Unassigned()
available_instance_count: Optional[int] = Unassigned()
used_by_current_endpoint: Optional[int] = Unassigned()


class ProductionVariantCapacityReservationSummary(Base):
"""
ProductionVariantCapacityReservationSummary
Details about an ML capacity reservation.

Attributes
----------------------
ml_reservation_arn: The Amazon Resource Name (ARN) that uniquely identifies the ML capacity reservation that SageMaker AI applies when it deploys the endpoint.
capacity_reservation_preference: The option that you chose for the capacity reservation. SageMaker AI supports the following options: capacity-reservations-only SageMaker AI launches instances only into an ML capacity reservation. If no capacity is available, the instances fail to launch.
total_instance_count: The number of instances that you allocated to the ML capacity reservation.
available_instance_count: The number of instances that are currently available in the ML capacity reservation.
used_by_current_endpoint: The number of instances from the ML capacity reservation that are being used by the endpoint.
ec2_capacity_reservations: The EC2 capacity reservations that are shared to this ML capacity reservation, if any.
"""

ml_reservation_arn: Optional[str] = Unassigned()
capacity_reservation_preference: Optional[str] = Unassigned()
total_instance_count: Optional[int] = Unassigned()
available_instance_count: Optional[int] = Unassigned()
used_by_current_endpoint: Optional[int] = Unassigned()
ec2_capacity_reservations: Optional[List[Ec2CapacityReservation]] = Unassigned()


class ProductionVariantSummary(Base):
"""
ProductionVariantSummary
Expand All @@ -8182,6 +8243,7 @@ class ProductionVariantSummary(Base):
desired_serverless_config: The serverless configuration requested for the endpoint update.
managed_instance_scaling: Settings that control the range in the number of instances that the endpoint provisions as it scales up or down to accommodate traffic.
routing_config: Settings that control how the endpoint routes incoming traffic to the instances that the endpoint hosts.
capacity_reservation_config: Settings for the capacity reservation for the compute instances that SageMaker AI reserves for an endpoint.
"""

variant_name: str
Expand All @@ -8195,6 +8257,9 @@ class ProductionVariantSummary(Base):
desired_serverless_config: Optional[ProductionVariantServerlessConfig] = Unassigned()
managed_instance_scaling: Optional[ProductionVariantManagedInstanceScaling] = Unassigned()
routing_config: Optional[ProductionVariantRoutingConfig] = Unassigned()
capacity_reservation_config: Optional[ProductionVariantCapacityReservationSummary] = (
Unassigned()
)


class PendingProductionVariantSummary(Base):
Expand Down
Loading