Skip to content

Commit

Permalink
feat(client-sagemaker): SageMaker Inference Recommender now decouples…
Browse files Browse the repository at this point in the history
… from Model Registry and could accept Model Name to invoke inference recommendations job; Inference Recommender now provides CPU/Memory Utilization metrics data in recommendation output.
  • Loading branch information
awstools committed Jan 25, 2023
1 parent f699098 commit bf7623f
Show file tree
Hide file tree
Showing 4 changed files with 128 additions and 10 deletions.
14 changes: 13 additions & 1 deletion clients/client-sagemaker/src/models/models_1.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2618,6 +2618,13 @@ export interface RecommendationJobContainerConfig {
* <p>A list of the instance types that are used to generate inferences in real-time.</p>
*/
SupportedInstanceTypes?: string[];

/**
* <p>Specifies the name and shape of the expected data inputs for your trained model with a JSON dictionary form.
* This field is used for optimizing your model using SageMaker Neo. For more information, see
* <a href="https://docs.aws.amazon.com/sagemaker/latest/APIReference/API_InputConfig.html#sagemaker-Type-InputConfig-DataInputConfig">DataInputConfig</a>.</p>
*/
DataInputConfig?: string;
}

/**
Expand Down Expand Up @@ -2738,7 +2745,7 @@ export interface RecommendationJobInputConfig {
/**
* <p>The Amazon Resource Name (ARN) of a versioned model package.</p>
*/
ModelPackageVersionArn: string | undefined;
ModelPackageVersionArn?: string;

/**
* <p>Specifies the maximum duration of the job, in seconds.></p>
Expand Down Expand Up @@ -2816,6 +2823,11 @@ export interface RecommendationJobInputConfig {
* <p>Inference Recommender provisions SageMaker endpoints with access to VPC in the inference recommendation job.</p>
*/
VpcConfig?: RecommendationJobVpcConfig;

/**
* <p>The name of the created model.</p>
*/
ModelName?: string;
}

export enum RecommendationJobType {
Expand Down
24 changes: 24 additions & 0 deletions clients/client-sagemaker/src/models/models_2.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3337,6 +3337,20 @@ export interface RecommendationMetrics {
* <p>The expected model latency at maximum invocation per minute for the instance.</p>
*/
ModelLatency: number | undefined;

/**
* <p>The expected CPU utilization at maximum invocations per minute for the instance.</p>
* <p>
* <code>NaN</code> indicates that the value is not available.</p>
*/
CpuUtilization?: number;

/**
* <p>The expected memory utilization at maximum invocations per minute for the instance.</p>
* <p>
* <code>NaN</code> indicates that the value is not available.</p>
*/
MemoryUtilization?: number;
}

/**
Expand Down Expand Up @@ -3372,6 +3386,11 @@ export interface ModelConfiguration {
* <p>Defines the environment parameters that includes key, value types, and values.</p>
*/
EnvironmentParameters?: EnvironmentParameter[];

/**
* <p>The name of the compilation job used to create the recommended model artifacts.</p>
*/
CompilationJobName?: string;
}

/**
Expand All @@ -3392,6 +3411,11 @@ export interface InferenceRecommendation {
* <p>Defines the model configuration.</p>
*/
ModelConfiguration: ModelConfiguration | undefined;

/**
* <p>The recommendation ID which uniquely identifies each recommendation.</p>
*/
RecommendationId?: string;
}

export enum RecommendationJobStatus {
Expand Down
28 changes: 21 additions & 7 deletions clients/client-sagemaker/src/protocols/Aws_json1_1.ts
Original file line number Diff line number Diff line change
Expand Up @@ -13687,13 +13687,19 @@ const deserializeAws_json1_1ListInferenceRecommendationsJobStepsCommandError = a
body: await parseErrorBody(output.body, context),
};
const errorCode = loadRestJsonErrorCode(output, parsedOutput.body);
const parsedBody = parsedOutput.body;
throwDefaultError({
output,
parsedBody,
exceptionCtor: __BaseException,
errorCode,
});
switch (errorCode) {
case "ResourceNotFound":
case "com.amazonaws.sagemaker#ResourceNotFound":
throw await deserializeAws_json1_1ResourceNotFoundResponse(parsedOutput, context);
default:
const parsedBody = parsedOutput.body;
throwDefaultError({
output,
parsedBody,
exceptionCtor: __BaseException,
errorCode,
});
}
};

export const deserializeAws_json1_1ListLabelingJobsCommand = async (
Expand Down Expand Up @@ -24427,6 +24433,7 @@ const serializeAws_json1_1RecommendationJobContainerConfig = (
context: __SerdeContext
): any => {
return {
...(input.DataInputConfig != null && { DataInputConfig: input.DataInputConfig }),
...(input.Domain != null && { Domain: input.Domain }),
...(input.Framework != null && { Framework: input.Framework }),
...(input.FrameworkVersion != null && { FrameworkVersion: input.FrameworkVersion }),
Expand Down Expand Up @@ -24457,6 +24464,7 @@ const serializeAws_json1_1RecommendationJobInputConfig = (
}),
...(input.Endpoints != null && { Endpoints: serializeAws_json1_1Endpoints(input.Endpoints, context) }),
...(input.JobDurationInSeconds != null && { JobDurationInSeconds: input.JobDurationInSeconds }),
...(input.ModelName != null && { ModelName: input.ModelName }),
...(input.ModelPackageVersionArn != null && { ModelPackageVersionArn: input.ModelPackageVersionArn }),
...(input.ResourceLimit != null && {
ResourceLimit: serializeAws_json1_1RecommendationJobResourceLimit(input.ResourceLimit, context),
Expand Down Expand Up @@ -32466,6 +32474,7 @@ const deserializeAws_json1_1InferenceRecommendation = (
output.ModelConfiguration != null
? deserializeAws_json1_1ModelConfiguration(output.ModelConfiguration, context)
: undefined,
RecommendationId: __expectString(output.RecommendationId),
} as any;
};

Expand Down Expand Up @@ -34196,6 +34205,7 @@ const deserializeAws_json1_1ModelClientConfig = (output: any, context: __SerdeCo

const deserializeAws_json1_1ModelConfiguration = (output: any, context: __SerdeContext): ModelConfiguration => {
return {
CompilationJobName: __expectString(output.CompilationJobName),
EnvironmentParameters:
output.EnvironmentParameters != null
? deserializeAws_json1_1EnvironmentParameters(output.EnvironmentParameters, context)
Expand Down Expand Up @@ -36703,6 +36713,7 @@ const deserializeAws_json1_1RecommendationJobContainerConfig = (
context: __SerdeContext
): RecommendationJobContainerConfig => {
return {
DataInputConfig: __expectString(output.DataInputConfig),
Domain: __expectString(output.Domain),
Framework: __expectString(output.Framework),
FrameworkVersion: __expectString(output.FrameworkVersion),
Expand Down Expand Up @@ -36752,6 +36763,7 @@ const deserializeAws_json1_1RecommendationJobInputConfig = (
: undefined,
Endpoints: output.Endpoints != null ? deserializeAws_json1_1Endpoints(output.Endpoints, context) : undefined,
JobDurationInSeconds: __expectInt32(output.JobDurationInSeconds),
ModelName: __expectString(output.ModelName),
ModelPackageVersionArn: __expectString(output.ModelPackageVersionArn),
ResourceLimit:
output.ResourceLimit != null
Expand Down Expand Up @@ -36875,7 +36887,9 @@ const deserializeAws_json1_1RecommendationMetrics = (output: any, context: __Ser
return {
CostPerHour: __limitedParseFloat32(output.CostPerHour),
CostPerInference: __limitedParseFloat32(output.CostPerInference),
CpuUtilization: __limitedParseFloat32(output.CpuUtilization),
MaxInvocations: __expectInt32(output.MaxInvocations),
MemoryUtilization: __limitedParseFloat32(output.MemoryUtilization),
ModelLatency: __expectInt32(output.ModelLatency),
} as any;
};
Expand Down
72 changes: 70 additions & 2 deletions codegen/sdk-codegen/aws-models/sagemaker.json
Original file line number Diff line number Diff line change
Expand Up @@ -25771,6 +25771,12 @@
"smithy.api#documentation": "<p>Defines the model configuration.</p>",
"smithy.api#required": {}
}
},
"RecommendationId": {
"target": "com.amazonaws.sagemaker#String",
"traits": {
"smithy.api#documentation": "<p>The recommendation ID which uniquely identifies each recommendation.</p>"
}
}
},
"traits": {
Expand Down Expand Up @@ -30548,6 +30554,11 @@
"output": {
"target": "com.amazonaws.sagemaker#ListInferenceRecommendationsJobStepsResponse"
},
"errors": [
{
"target": "com.amazonaws.sagemaker#ResourceNotFound"
}
],
"traits": {
"smithy.api#documentation": "<p>Returns a list of the subtasks for an Inference Recommender job.</p>\n <p>The supported subtasks are benchmarks, which evaluate the performance of your model on different instance types.</p>",
"smithy.api#paginated": {
Expand Down Expand Up @@ -35334,6 +35345,12 @@
"traits": {
"smithy.api#documentation": "<p>Defines the environment parameters that includes key, value types, and values.</p>"
}
},
"CompilationJobName": {
"target": "com.amazonaws.sagemaker#RecommendationJobCompilationJobName",
"traits": {
"smithy.api#documentation": "<p>The name of the compilation job used to create the recommended model artifacts.</p>"
}
}
},
"traits": {
Expand Down Expand Up @@ -43631,6 +43648,16 @@
"smithy.api#pattern": "^arn:aws[a-z\\-]*:sagemaker:[a-z0-9\\-]*:[0-9]{12}:inference-recommendations-job/"
}
},
"com.amazonaws.sagemaker#RecommendationJobCompilationJobName": {
"type": "string",
"traits": {
"smithy.api#length": {
"min": 1,
"max": 63
},
"smithy.api#pattern": "^[a-zA-Z0-9](-*[a-zA-Z0-9]){0,62}$"
}
},
"com.amazonaws.sagemaker#RecommendationJobCompiledOutputConfig": {
"type": "structure",
"members": {
Expand Down Expand Up @@ -43689,12 +43716,28 @@
"traits": {
"smithy.api#documentation": "<p>A list of the instance types that are used to generate inferences in real-time.</p>"
}
},
"DataInputConfig": {
"target": "com.amazonaws.sagemaker#RecommendationJobDataInputConfig",
"traits": {
"smithy.api#documentation": "<p>Specifies the name and shape of the expected data inputs for your trained model with a JSON dictionary form.\n This field is used for optimizing your model using SageMaker Neo. For more information, see\n <a href=\"https://docs.aws.amazon.com/sagemaker/latest/APIReference/API_InputConfig.html#sagemaker-Type-InputConfig-DataInputConfig\">DataInputConfig</a>.</p>"
}
}
},
"traits": {
"smithy.api#documentation": "<p>Specifies mandatory fields for running an Inference Recommender job directly in the\n <a href=\"https://docs.aws.amazon.com/sagemaker/latest/APIReference/API_CreateInferenceRecommendationsJob.html\">CreateInferenceRecommendationsJob</a>\n API. The fields specified in <code>ContainerConfig</code> override the corresponding fields in the model package. Use\n <code>ContainerConfig</code> if you want to specify these fields for the recommendation job but don't want to edit them in your model package.</p>"
}
},
"com.amazonaws.sagemaker#RecommendationJobDataInputConfig": {
"type": "string",
"traits": {
"smithy.api#length": {
"min": 1,
"max": 1024
},
"smithy.api#pattern": "^[\\S\\s]+$"
}
},
"com.amazonaws.sagemaker#RecommendationJobDescription": {
"type": "string",
"traits": {
Expand Down Expand Up @@ -43736,8 +43779,7 @@
"ModelPackageVersionArn": {
"target": "com.amazonaws.sagemaker#ModelPackageArn",
"traits": {
"smithy.api#documentation": "<p>The Amazon Resource Name (ARN) of a versioned model package.</p>",
"smithy.api#required": {}
"smithy.api#documentation": "<p>The Amazon Resource Name (ARN) of a versioned model package.</p>"
}
},
"JobDurationInSeconds": {
Expand Down Expand Up @@ -43787,6 +43829,12 @@
"traits": {
"smithy.api#documentation": "<p>Inference Recommender provisions SageMaker endpoints with access to VPC in the inference recommendation job.</p>"
}
},
"ModelName": {
"target": "com.amazonaws.sagemaker#ModelName",
"traits": {
"smithy.api#documentation": "<p>The name of the created model.</p>"
}
}
},
"traits": {
Expand Down Expand Up @@ -44054,6 +44102,18 @@
"smithy.api#documentation": "<p>The expected model latency at maximum invocation per minute for the instance.</p>",
"smithy.api#required": {}
}
},
"CpuUtilization": {
"target": "com.amazonaws.sagemaker#UtilizationMetric",
"traits": {
"smithy.api#documentation": "<p>The expected CPU utilization at maximum invocations per minute for the instance.</p>\n <p>\n <code>NaN</code> indicates that the value is not available.</p>"
}
},
"MemoryUtilization": {
"target": "com.amazonaws.sagemaker#UtilizationMetric",
"traits": {
"smithy.api#documentation": "<p>The expected memory utilization at maximum invocations per minute for the instance.</p>\n <p>\n <code>NaN</code> indicates that the value is not available.</p>"
}
}
},
"traits": {
Expand Down Expand Up @@ -55918,6 +55978,14 @@
"smithy.api#documentation": "<p>A collection of settings that apply to users of Amazon SageMaker Studio. These settings are\n specified when the <code>CreateUserProfile</code> API is called, and as <code>DefaultUserSettings</code>\n when the <code>CreateDomain</code> API is called.</p>\n <p>\n <code>SecurityGroups</code> is aggregated when specified in both calls. For all other\n settings in <code>UserSettings</code>, the values specified in <code>CreateUserProfile</code>\n take precedence over those specified in <code>CreateDomain</code>.</p>"
}
},
"com.amazonaws.sagemaker#UtilizationMetric": {
"type": "float",
"traits": {
"smithy.api#range": {
"min": 0.0
}
}
},
"com.amazonaws.sagemaker#ValidationFraction": {
"type": "float",
"traits": {
Expand Down

0 comments on commit bf7623f

Please sign in to comment.