Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions packages/@aws-cdk/aws-eks-v2-alpha/lib/managed-nodegroup.ts
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,10 @@ export enum NodegroupAmiType {
* Amazon Linux 2023 with NVIDIA drivers (x86-64)
*/
AL2023_X86_64_NVIDIA = 'AL2023_x86_64_NVIDIA',
/**
* Amazon Linux 2023 with NVIDIA drivers (ARM64)
*/
AL2023_ARM_64_NVIDIA = 'AL2023_ARM_64_NVIDIA',
/**
* Amazon Linux 2023 (ARM-64)
*/
Expand Down Expand Up @@ -634,6 +638,7 @@ const gpuAmiTypes: NodegroupAmiType[] = [
NodegroupAmiType.AL2_X86_64_GPU,
NodegroupAmiType.AL2023_X86_64_NEURON,
NodegroupAmiType.AL2023_X86_64_NVIDIA,
NodegroupAmiType.AL2023_ARM_64_NVIDIA,
NodegroupAmiType.BOTTLEROCKET_X86_64_NVIDIA,
NodegroupAmiType.BOTTLEROCKET_ARM_64_NVIDIA,
];
Expand Down
2 changes: 1 addition & 1 deletion packages/@aws-cdk/aws-eks-v2-alpha/test/nodegroup.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -638,7 +638,7 @@ describe('node group', () => {
new ec2.InstanceType('p3.large'),
new ec2.InstanceType('g3.large'),
],
})).toThrow(/The specified AMI does not match the instance types architecture, either specify one of AL2_X86_64_GPU, AL2023_X86_64_NEURON, AL2023_X86_64_NVIDIA, BOTTLEROCKET_X86_64_NVIDIA, BOTTLEROCKET_ARM_64_NVIDIA or don't specify any/);
})).toThrow(/The specified AMI does not match the instance types architecture, either specify one of AL2_X86_64_GPU, AL2023_X86_64_NEURON, AL2023_X86_64_NVIDIA, AL2023_ARM_64_NVIDIA, BOTTLEROCKET_X86_64_NVIDIA, BOTTLEROCKET_ARM_64_NVIDIA or don't specify any/);
});

/**
Expand Down
26 changes: 26 additions & 0 deletions packages/@aws-cdk/aws-sagemaker-alpha/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -214,6 +214,32 @@ const endpointConfig = new sagemaker.EndpointConfig(this, 'EndpointConfig', {
});
```

#### Container Startup Health Check Timeout

You can specify a timeout value for your inference container to pass health check by configuring
the `containerStartupHealthCheckTimeout` property. This is useful when your model takes longer
to initialize and you want to avoid premature health check failures:

```typescript
import * as cdk from 'aws-cdk-lib';
import * as sagemaker from '@aws-cdk/aws-sagemaker-alpha';

declare const model: sagemaker.Model;

const endpointConfig = new sagemaker.EndpointConfig(this, 'EndpointConfig', {
instanceProductionVariants: [
{
model: model,
variantName: 'my-variant',
containerStartupHealthCheckTimeout: cdk.Duration.minutes(5), // 5 minutes timeout
},
]
});
```

The timeout value must be between 60 seconds and 1 hour (3600 seconds). If not specified,
Amazon SageMaker uses the default timeout behavior.

### Endpoint

When you create an endpoint from an `EndpointConfig`, Amazon SageMaker launches the ML compute
Expand Down
32 changes: 32 additions & 0 deletions packages/@aws-cdk/aws-sagemaker-alpha/lib/endpoint-config.ts
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,11 @@ interface ProductionVariantProps {
* Name of the production variant.
*/
readonly variantName: string;
/**
* The timeout value, in seconds, for your inference container to pass health check.
* @default - none
*/
readonly containerStartupHealthCheckTimeout?: cdk.Duration;
}

/**
Expand All @@ -73,6 +78,12 @@ export interface InstanceProductionVariantProps extends ProductionVariantProps {
* @default InstanceType.T2_MEDIUM
*/
readonly instanceType?: InstanceType;
/**
* The timeout value, in seconds, for your inference container to pass health check.
* Range between 60 and 3600 seconds.
* @default - none
*/
readonly containerStartupHealthCheckTimeout?: cdk.Duration;
}

/**
Expand Down Expand Up @@ -117,6 +128,13 @@ export interface InstanceProductionVariant extends ProductionVariant {
* Instance type of the production variant.
*/
readonly instanceType: InstanceType;

/**
* The timeout value, in seconds, for your inference container to pass health check.
* Range between 60 and 3600 seconds.
* @default - none
*/
readonly containerStartupHealthCheckTimeoutInSeconds?: number;
}

/**
Expand Down Expand Up @@ -242,13 +260,15 @@ export class EndpointConfig extends cdk.Resource implements IEndpointConfig {
throw new Error(`There is already a Production Variant with name '${props.variantName}'`);
}
this.validateInstanceProductionVariantProps(props);
this.validateHealthCheckTimeout(props.containerStartupHealthCheckTimeout);
this.instanceProductionVariantsByName[props.variantName] = {
acceleratorType: props.acceleratorType,
initialInstanceCount: props.initialInstanceCount || 1,
initialVariantWeight: props.initialVariantWeight || 1.0,
instanceType: props.instanceType || InstanceType.T2_MEDIUM,
modelName: props.model.modelName,
variantName: props.variantName,
containerStartupHealthCheckTimeoutInSeconds: props.containerStartupHealthCheckTimeout?.toSeconds(),
};
}

Expand Down Expand Up @@ -322,6 +342,18 @@ export class EndpointConfig extends cdk.Resource implements IEndpointConfig {
instanceType: v.instanceType.toString(),
modelName: v.modelName,
variantName: v.variantName,
containerStartupHealthCheckTimeoutInSeconds: v.containerStartupHealthCheckTimeoutInSeconds,
}) );
}
/**
* Validate the container startup health check timeout.
*/
private validateHealthCheckTimeout(timeout?: cdk.Duration) {
if (timeout) {
const timeoutInSeconds = timeout.toSeconds();
if (timeoutInSeconds < 60 || timeoutInSeconds > 3600) {
throw new Error('Configure \'containerStartupHealthCheckTimeout\' between 60 and 3600 seconds');
}
}
}
}
103 changes: 103 additions & 0 deletions packages/@aws-cdk/aws-sagemaker-alpha/test/endpoint-config.test.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import * as path from 'path';
import * as cdk from 'aws-cdk-lib';
import { Template, Match } from 'aws-cdk-lib/assertions';
import * as sagemaker from '../lib';

describe('When synthesizing a stack containing an EndpointConfig', () => {
Expand Down Expand Up @@ -357,3 +358,105 @@ describe('When sharing a model from an origin stack with a destination stack', (
});
});
});

describe('When containerStartupHealthCheckTimeoutInSeconds is set', () => {
test('should be included in CloudFormation template when provided', () => {
// GIVEN
const stack = new cdk.Stack();
const model = sagemaker.Model.fromModelName(stack, 'Model', 'model');
// WHEN
new sagemaker.EndpointConfig(stack, 'EndpointConfig', {
instanceProductionVariants: [{
variantName: 'variant',
model,
containerStartupHealthCheckTimeout: cdk.Duration.minutes(5), // 300 seconds
}],
});
// THEN
Template.fromStack(stack).hasResourceProperties('AWS::SageMaker::EndpointConfig', {
ProductionVariants: [{
ModelName: 'model',
VariantName: 'variant',
ContainerStartupHealthCheckTimeoutInSeconds: 300,
}],
});
});
test('should not be included when not provided', () => {
// GIVEN
const stack = new cdk.Stack();
const model = sagemaker.Model.fromModelName(stack, 'Model', 'model');
// WHEN
new sagemaker.EndpointConfig(stack, 'EndpointConfig', {
instanceProductionVariants: [{
variantName: 'variant',
model,
}],
});
// THEN
Template.fromStack(stack).hasResourceProperties('AWS::SageMaker::EndpointConfig', {
ProductionVariants: [{
ModelName: 'model',
VariantName: 'variant',
ContainerStartupHealthCheckTimeoutInSeconds: Match.absent(),
}],
});
});

test('should throw error when timeout is less than 60 seconds', () => {
// GIVEN
const stack = new cdk.Stack();
const model = sagemaker.Model.fromModelName(stack, 'Model', 'model');
// WHEN & THEN
expect(() => {
new sagemaker.EndpointConfig(stack, 'EndpointConfig', {
instanceProductionVariants: [{
variantName: 'variant',
model,
containerStartupHealthCheckTimeout: cdk.Duration.seconds(30),
}],
});
}).toThrow('Configure \'containerStartupHealthCheckTimeout\' between 60 and 3600 seconds');
});

test('should throw error when timeout is greater than 3600 seconds', () => {
// GIVEN
const stack = new cdk.Stack();
const model = sagemaker.Model.fromModelName(stack, 'Model', 'model');
// WHEN & THEN
expect(() => {
new sagemaker.EndpointConfig(stack, 'EndpointConfig', {
instanceProductionVariants: [{
variantName: 'variant',
model,
containerStartupHealthCheckTimeout: cdk.Duration.hours(2), // 7200 seconds
}],
});
}).toThrow('Configure \'containerStartupHealthCheckTimeout\' between 60 and 3600 seconds');
});

test('should accept valid timeout values', () => {
// GIVEN
const stack = new cdk.Stack();
const model = sagemaker.Model.fromModelName(stack, 'Model', 'model');
// WHEN & THEN - should not throw
expect(() => {
new sagemaker.EndpointConfig(stack, 'EndpointConfig1', {
instanceProductionVariants: [{
variantName: 'variant',
model,
containerStartupHealthCheckTimeout: cdk.Duration.seconds(60), // minimum
}],
});
}).not.toThrow();

expect(() => {
new sagemaker.EndpointConfig(stack, 'EndpointConfig2', {
instanceProductionVariants: [{
variantName: 'variant',
model,
containerStartupHealthCheckTimeout: cdk.Duration.seconds(3600), // maximum
}],
});
}).not.toThrow();
});
});
5 changes: 5 additions & 0 deletions packages/aws-cdk-lib/aws-eks/lib/managed-nodegroup.ts
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,10 @@ export enum NodegroupAmiType {
* Amazon Linux 2023 with NVIDIA drivers (x86-64)
*/
AL2023_X86_64_NVIDIA = 'AL2023_x86_64_NVIDIA',
/**
* Amazon Linux 2023 with NVIDIA drivers (ARM64)
*/
AL2023_ARM_64_NVIDIA = 'AL2023_ARM_64_NVIDIA',
/**
* Amazon Linux 2023 (ARM-64)
*/
Expand Down Expand Up @@ -647,6 +651,7 @@ const gpuAmiTypes: NodegroupAmiType[] = [
NodegroupAmiType.AL2_X86_64_GPU,
NodegroupAmiType.AL2023_X86_64_NEURON,
NodegroupAmiType.AL2023_X86_64_NVIDIA,
NodegroupAmiType.AL2023_ARM_64_NVIDIA,
NodegroupAmiType.BOTTLEROCKET_X86_64_NVIDIA,
NodegroupAmiType.BOTTLEROCKET_ARM_64_NVIDIA,
];
Expand Down
2 changes: 1 addition & 1 deletion packages/aws-cdk-lib/aws-eks/test/nodegroup.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -665,7 +665,7 @@ describe('node group', () => {
new ec2.InstanceType('p3.large'),
new ec2.InstanceType('g3.large'),
],
})).toThrow(/The specified AMI does not match the instance types architecture, either specify one of AL2_X86_64_GPU, AL2023_X86_64_NEURON, AL2023_X86_64_NVIDIA, BOTTLEROCKET_X86_64_NVIDIA, BOTTLEROCKET_ARM_64_NVIDIA or don't specify any/);
})).toThrow(/The specified AMI does not match the instance types architecture, either specify one of AL2_X86_64_GPU, AL2023_X86_64_NEURON, AL2023_X86_64_NVIDIA, AL2023_ARM_64_NVIDIA, BOTTLEROCKET_X86_64_NVIDIA, BOTTLEROCKET_ARM_64_NVIDIA or don't specify any/);
});

/**
Expand Down