Skip to content

Commit

Permalink
feat(client-sagemaker): SageMaker now provides an instantaneous deplo…
Browse files Browse the repository at this point in the history
…yment recommendation through the DescribeModel API
  • Loading branch information
awstools committed May 24, 2023
1 parent 1d7418f commit 4c29c21
Show file tree
Hide file tree
Showing 10 changed files with 320 additions and 123 deletions.
10 changes: 10 additions & 0 deletions clients/client-sagemaker/src/commands/DescribeModelCommand.ts
Expand Up @@ -108,6 +108,16 @@ export interface DescribeModelCommandOutput extends DescribeModelOutput, __Metad
* // CreationTime: new Date("TIMESTAMP"), // required
* // ModelArn: "STRING_VALUE", // required
* // EnableNetworkIsolation: true || false,
* // DeploymentRecommendation: { // DeploymentRecommendation
* // RecommendationStatus: "IN_PROGRESS" || "COMPLETED" || "FAILED" || "NOT_APPLICABLE", // required
* // RealTimeInferenceRecommendations: [ // RealTimeInferenceRecommendations
* // { // RealTimeInferenceRecommendation
* // RecommendationId: "STRING_VALUE", // required
* // InstanceType: "ml.t2.medium" || "ml.t2.large" || "ml.t2.xlarge" || "ml.t2.2xlarge" || "ml.m4.xlarge" || "ml.m4.2xlarge" || "ml.m4.4xlarge" || "ml.m4.10xlarge" || "ml.m4.16xlarge" || "ml.m5.large" || "ml.m5.xlarge" || "ml.m5.2xlarge" || "ml.m5.4xlarge" || "ml.m5.12xlarge" || "ml.m5.24xlarge" || "ml.m5d.large" || "ml.m5d.xlarge" || "ml.m5d.2xlarge" || "ml.m5d.4xlarge" || "ml.m5d.12xlarge" || "ml.m5d.24xlarge" || "ml.c4.large" || "ml.c4.xlarge" || "ml.c4.2xlarge" || "ml.c4.4xlarge" || "ml.c4.8xlarge" || "ml.p2.xlarge" || "ml.p2.8xlarge" || "ml.p2.16xlarge" || "ml.p3.2xlarge" || "ml.p3.8xlarge" || "ml.p3.16xlarge" || "ml.c5.large" || "ml.c5.xlarge" || "ml.c5.2xlarge" || "ml.c5.4xlarge" || "ml.c5.9xlarge" || "ml.c5.18xlarge" || "ml.c5d.large" || "ml.c5d.xlarge" || "ml.c5d.2xlarge" || "ml.c5d.4xlarge" || "ml.c5d.9xlarge" || "ml.c5d.18xlarge" || "ml.g4dn.xlarge" || "ml.g4dn.2xlarge" || "ml.g4dn.4xlarge" || "ml.g4dn.8xlarge" || "ml.g4dn.12xlarge" || "ml.g4dn.16xlarge" || "ml.r5.large" || "ml.r5.xlarge" || "ml.r5.2xlarge" || "ml.r5.4xlarge" || "ml.r5.12xlarge" || "ml.r5.24xlarge" || "ml.r5d.large" || "ml.r5d.xlarge" || "ml.r5d.2xlarge" || "ml.r5d.4xlarge" || "ml.r5d.12xlarge" || "ml.r5d.24xlarge" || "ml.inf1.xlarge" || "ml.inf1.2xlarge" || "ml.inf1.6xlarge" || "ml.inf1.24xlarge" || "ml.c6i.large" || "ml.c6i.xlarge" || "ml.c6i.2xlarge" || "ml.c6i.4xlarge" || "ml.c6i.8xlarge" || "ml.c6i.12xlarge" || "ml.c6i.16xlarge" || "ml.c6i.24xlarge" || "ml.c6i.32xlarge" || "ml.g5.xlarge" || "ml.g5.2xlarge" || "ml.g5.4xlarge" || "ml.g5.8xlarge" || "ml.g5.12xlarge" || "ml.g5.16xlarge" || "ml.g5.24xlarge" || "ml.g5.48xlarge" || "ml.p4d.24xlarge" || "ml.c7g.large" || "ml.c7g.xlarge" || "ml.c7g.2xlarge" || "ml.c7g.4xlarge" || "ml.c7g.8xlarge" || "ml.c7g.12xlarge" || "ml.c7g.16xlarge" || "ml.m6g.large" || "ml.m6g.xlarge" || "ml.m6g.2xlarge" || "ml.m6g.4xlarge" || "ml.m6g.8xlarge" || "ml.m6g.12xlarge" || "ml.m6g.16xlarge" || "ml.m6gd.large" || "ml.m6gd.xlarge" || "ml.m6gd.2xlarge" || "ml.m6gd.4xlarge" || "ml.m6gd.8xlarge" || "ml.m6gd.12xlarge" || "ml.m6gd.16xlarge" || "ml.c6g.large" || "ml.c6g.xlarge" || "ml.c6g.2xlarge" || "ml.c6g.4xlarge" || "ml.c6g.8xlarge" || "ml.c6g.12xlarge" || "ml.c6g.16xlarge" || "ml.c6gd.large" || "ml.c6gd.xlarge" || "ml.c6gd.2xlarge" || "ml.c6gd.4xlarge" || "ml.c6gd.8xlarge" || "ml.c6gd.12xlarge" || "ml.c6gd.16xlarge" || "ml.c6gn.large" || "ml.c6gn.xlarge" || "ml.c6gn.2xlarge" || "ml.c6gn.4xlarge" || "ml.c6gn.8xlarge" || "ml.c6gn.12xlarge" || "ml.c6gn.16xlarge" || "ml.r6g.large" || "ml.r6g.xlarge" || "ml.r6g.2xlarge" || "ml.r6g.4xlarge" || "ml.r6g.8xlarge" || "ml.r6g.12xlarge" || "ml.r6g.16xlarge" || "ml.r6gd.large" || "ml.r6gd.xlarge" || "ml.r6gd.2xlarge" || "ml.r6gd.4xlarge" || "ml.r6gd.8xlarge" || "ml.r6gd.12xlarge" || "ml.r6gd.16xlarge" || "ml.p4de.24xlarge" || "ml.trn1.2xlarge" || "ml.trn1.32xlarge" || "ml.inf2.xlarge" || "ml.inf2.8xlarge" || "ml.inf2.24xlarge" || "ml.inf2.48xlarge", // required
* // Environment: "<EnvironmentMap>",
* // },
* // ],
* // },
* // };
*
* ```
Expand Down
10 changes: 10 additions & 0 deletions clients/client-sagemaker/src/commands/SearchCommand.ts
Expand Up @@ -1710,6 +1710,16 @@ export interface SearchCommandOutput extends SearchResponse, __MetadataBearer {}
* // ModelArn: "STRING_VALUE",
* // EnableNetworkIsolation: true || false,
* // Tags: "<TagList>",
* // DeploymentRecommendation: { // DeploymentRecommendation
* // RecommendationStatus: "IN_PROGRESS" || "COMPLETED" || "FAILED" || "NOT_APPLICABLE", // required
* // RealTimeInferenceRecommendations: [ // RealTimeInferenceRecommendations
* // { // RealTimeInferenceRecommendation
* // RecommendationId: "STRING_VALUE", // required
* // InstanceType: "ml.t2.medium" || "ml.t2.large" || "ml.t2.xlarge" || "ml.t2.2xlarge" || "ml.m4.xlarge" || "ml.m4.2xlarge" || "ml.m4.4xlarge" || "ml.m4.10xlarge" || "ml.m4.16xlarge" || "ml.m5.large" || "ml.m5.xlarge" || "ml.m5.2xlarge" || "ml.m5.4xlarge" || "ml.m5.12xlarge" || "ml.m5.24xlarge" || "ml.m5d.large" || "ml.m5d.xlarge" || "ml.m5d.2xlarge" || "ml.m5d.4xlarge" || "ml.m5d.12xlarge" || "ml.m5d.24xlarge" || "ml.c4.large" || "ml.c4.xlarge" || "ml.c4.2xlarge" || "ml.c4.4xlarge" || "ml.c4.8xlarge" || "ml.p2.xlarge" || "ml.p2.8xlarge" || "ml.p2.16xlarge" || "ml.p3.2xlarge" || "ml.p3.8xlarge" || "ml.p3.16xlarge" || "ml.c5.large" || "ml.c5.xlarge" || "ml.c5.2xlarge" || "ml.c5.4xlarge" || "ml.c5.9xlarge" || "ml.c5.18xlarge" || "ml.c5d.large" || "ml.c5d.xlarge" || "ml.c5d.2xlarge" || "ml.c5d.4xlarge" || "ml.c5d.9xlarge" || "ml.c5d.18xlarge" || "ml.g4dn.xlarge" || "ml.g4dn.2xlarge" || "ml.g4dn.4xlarge" || "ml.g4dn.8xlarge" || "ml.g4dn.12xlarge" || "ml.g4dn.16xlarge" || "ml.r5.large" || "ml.r5.xlarge" || "ml.r5.2xlarge" || "ml.r5.4xlarge" || "ml.r5.12xlarge" || "ml.r5.24xlarge" || "ml.r5d.large" || "ml.r5d.xlarge" || "ml.r5d.2xlarge" || "ml.r5d.4xlarge" || "ml.r5d.12xlarge" || "ml.r5d.24xlarge" || "ml.inf1.xlarge" || "ml.inf1.2xlarge" || "ml.inf1.6xlarge" || "ml.inf1.24xlarge" || "ml.c6i.large" || "ml.c6i.xlarge" || "ml.c6i.2xlarge" || "ml.c6i.4xlarge" || "ml.c6i.8xlarge" || "ml.c6i.12xlarge" || "ml.c6i.16xlarge" || "ml.c6i.24xlarge" || "ml.c6i.32xlarge" || "ml.g5.xlarge" || "ml.g5.2xlarge" || "ml.g5.4xlarge" || "ml.g5.8xlarge" || "ml.g5.12xlarge" || "ml.g5.16xlarge" || "ml.g5.24xlarge" || "ml.g5.48xlarge" || "ml.p4d.24xlarge" || "ml.c7g.large" || "ml.c7g.xlarge" || "ml.c7g.2xlarge" || "ml.c7g.4xlarge" || "ml.c7g.8xlarge" || "ml.c7g.12xlarge" || "ml.c7g.16xlarge" || "ml.m6g.large" || "ml.m6g.xlarge" || "ml.m6g.2xlarge" || "ml.m6g.4xlarge" || "ml.m6g.8xlarge" || "ml.m6g.12xlarge" || "ml.m6g.16xlarge" || "ml.m6gd.large" || "ml.m6gd.xlarge" || "ml.m6gd.2xlarge" || "ml.m6gd.4xlarge" || "ml.m6gd.8xlarge" || "ml.m6gd.12xlarge" || "ml.m6gd.16xlarge" || "ml.c6g.large" || "ml.c6g.xlarge" || "ml.c6g.2xlarge" || "ml.c6g.4xlarge" || "ml.c6g.8xlarge" || "ml.c6g.12xlarge" || "ml.c6g.16xlarge" || "ml.c6gd.large" || "ml.c6gd.xlarge" || "ml.c6gd.2xlarge" || "ml.c6gd.4xlarge" || "ml.c6gd.8xlarge" || "ml.c6gd.12xlarge" || "ml.c6gd.16xlarge" || "ml.c6gn.large" || "ml.c6gn.xlarge" || "ml.c6gn.2xlarge" || "ml.c6gn.4xlarge" || "ml.c6gn.8xlarge" || "ml.c6gn.12xlarge" || "ml.c6gn.16xlarge" || "ml.r6g.large" || "ml.r6g.xlarge" || "ml.r6g.2xlarge" || "ml.r6g.4xlarge" || "ml.r6g.8xlarge" || "ml.r6g.12xlarge" || "ml.r6g.16xlarge" || "ml.r6gd.large" || "ml.r6gd.xlarge" || "ml.r6gd.2xlarge" || "ml.r6gd.4xlarge" || "ml.r6gd.8xlarge" || "ml.r6gd.12xlarge" || "ml.r6gd.16xlarge" || "ml.p4de.24xlarge" || "ml.trn1.2xlarge" || "ml.trn1.32xlarge" || "ml.inf2.xlarge" || "ml.inf2.8xlarge" || "ml.inf2.24xlarge" || "ml.inf2.48xlarge", // required
* // Environment: "<EnvironmentMap>",
* // },
* // ],
* // },
* // },
* // Endpoints: [ // ModelDashboardEndpoints
* // { // ModelDashboardEndpoint
Expand Down
Expand Up @@ -13,7 +13,7 @@ import {
import { HttpRequest as __HttpRequest, HttpResponse as __HttpResponse } from "@smithy/protocol-http";
import { SerdeContext as __SerdeContext } from "@smithy/types";

import { StopProcessingJobRequest } from "../models/models_3";
import { StopProcessingJobRequest } from "../models/models_4";
import { de_StopProcessingJobCommand, se_StopProcessingJobCommand } from "../protocols/Aws_json1_1";
import { SageMakerClientResolvedConfig, ServiceInputTypes, ServiceOutputTypes } from "../SageMakerClient";

Expand Down
Expand Up @@ -13,7 +13,7 @@ import {
import { HttpRequest as __HttpRequest, HttpResponse as __HttpResponse } from "@smithy/protocol-http";
import { SerdeContext as __SerdeContext } from "@smithy/types";

import { StopTrainingJobRequest } from "../models/models_3";
import { StopTrainingJobRequest } from "../models/models_4";
import { de_StopTrainingJobCommand, se_StopTrainingJobCommand } from "../protocols/Aws_json1_1";
import { SageMakerClientResolvedConfig, ServiceInputTypes, ServiceOutputTypes } from "../SageMakerClient";

Expand Down
Expand Up @@ -13,7 +13,7 @@ import {
import { HttpRequest as __HttpRequest, HttpResponse as __HttpResponse } from "@smithy/protocol-http";
import { SerdeContext as __SerdeContext } from "@smithy/types";

import { StopTransformJobRequest } from "../models/models_3";
import { StopTransformJobRequest } from "../models/models_4";
import { de_StopTransformJobCommand, se_StopTransformJobCommand } from "../protocols/Aws_json1_1";
import { SageMakerClientResolvedConfig, ServiceInputTypes, ServiceOutputTypes } from "../SageMakerClient";

Expand Down
142 changes: 59 additions & 83 deletions clients/client-sagemaker/src/models/models_2.ts
Expand Up @@ -317,6 +317,60 @@ export interface DeployedImage {
ResolutionTime?: Date;
}

/**
* @public
* <p>The recommended configuration to use for Real-Time Inference.</p>
*/
export interface RealTimeInferenceRecommendation {
/**
* <p>The recommendation ID which uniquely identifies each recommendation.</p>
*/
RecommendationId: string | undefined;

/**
* <p>The recommended instance type for Real-Time Inference.</p>
*/
InstanceType: ProductionVariantInstanceType | string | undefined;

/**
* <p>The recommended environment variables to set in the model container for Real-Time Inference.</p>
*/
Environment?: Record<string, string>;
}

/**
* @public
* @enum
*/
export const RecommendationStatus = {
COMPLETED: "COMPLETED",
FAILED: "FAILED",
IN_PROGRESS: "IN_PROGRESS",
NOT_APPLICABLE: "NOT_APPLICABLE",
} as const;

/**
* @public
*/
export type RecommendationStatus = (typeof RecommendationStatus)[keyof typeof RecommendationStatus];

/**
* @public
* <p>A set of recommended deployment configurations for the model.</p>
*/
export interface DeploymentRecommendation {
/**
* <p>Status of the deployment recommendation. <code>NOT_APPLICABLE</code> means that SageMaker
* is unable to provide a default recommendation for the model using the information provided.</p>
*/
RecommendationStatus: RecommendationStatus | string | undefined;

/**
* <p>A list of <a href="https://docs.aws.amazon.com/sagemaker/latest/APIReference/API_RealTimeInferenceRecommendation.html">RealTimeInferenceRecommendation</a> items.</p>
*/
RealTimeInferenceRecommendations?: RealTimeInferenceRecommendation[];
}

/**
* @public
* @enum
Expand Down Expand Up @@ -4702,6 +4756,11 @@ export interface DescribeModelOutput {
* model container.</p>
*/
EnableNetworkIsolation?: boolean;

/**
* <p>A set of recommended deployment configurations for the model.</p>
*/
DeploymentRecommendation?: DeploymentRecommendation;
}

/**
Expand Down Expand Up @@ -10552,89 +10611,6 @@ export interface InferenceRecommendationsJob {
ModelPackageVersionArn?: string;
}

/**
* @public
* <p>The details for a specific benchmark from an Inference Recommender job.</p>
*/
export interface RecommendationJobInferenceBenchmark {
/**
* <p>The metrics of recommendations.</p>
*/
Metrics?: RecommendationMetrics;

/**
* <p>The endpoint configuration made by Inference Recommender during a recommendation job.</p>
*/
EndpointConfiguration?: EndpointOutputConfiguration;

/**
* <p>Defines the model configuration. Includes the specification name and environment parameters.</p>
*/
ModelConfiguration: ModelConfiguration | undefined;

/**
* <p>The reason why a benchmark failed.</p>
*/
FailureReason?: string;

/**
* <p>The metrics for an existing endpoint compared in an Inference Recommender job.</p>
*/
EndpointMetrics?: InferenceMetrics;

/**
* <p>A timestamp that shows when the benchmark completed.</p>
*/
InvocationEndTime?: Date;

/**
* <p>A timestamp that shows when the benchmark started.</p>
*/
InvocationStartTime?: Date;
}

/**
* @public
* @enum
*/
export const RecommendationStepType = {
BENCHMARK: "BENCHMARK",
} as const;

/**
* @public
*/
export type RecommendationStepType = (typeof RecommendationStepType)[keyof typeof RecommendationStepType];

/**
* @public
* <p>A returned array object for the <code>Steps</code> response field in the
* <a href="https://docs.aws.amazon.com/sagemaker/latest/APIReference/API_ListInferenceRecommendationsJobSteps.html">ListInferenceRecommendationsJobSteps</a> API command.</p>
*/
export interface InferenceRecommendationsJobStep {
/**
* <p>The type of the subtask.</p>
* <p>
* <code>BENCHMARK</code>: Evaluate the performance of your model on different instance types.</p>
*/
StepType: RecommendationStepType | string | undefined;

/**
* <p>The name of the Inference Recommender job.</p>
*/
JobName: string | undefined;

/**
* <p>The current status of the benchmark.</p>
*/
Status: RecommendationJobStatus | string | undefined;

/**
* <p>The details for a specific benchmark.</p>
*/
InferenceBenchmark?: RecommendationJobInferenceBenchmark;
}

/**
* @internal
*/
Expand Down

0 comments on commit 4c29c21

Please sign in to comment.