feat(client-sagemaker): SageMaker Inference Recommender now decouples…

… from Model Registry and could accept Model Name to invoke inference recommendations job; Inference Recommender now provides CPU/Memory Utilization metrics data in recommendation output.
aws · Jan 25, 2023 · bf7623f · bf7623f
1 parent f699098
commit bf7623f
Show file tree

Hide file tree

Showing 4 changed files with 128 additions and 10 deletions.
diff --git a/clients/client-sagemaker/src/models/models_1.ts b/clients/client-sagemaker/src/models/models_1.ts
@@ -2618,6 +2618,13 @@ export interface RecommendationJobContainerConfig {
    * <p>A list of the instance types that are used to generate inferences in real-time.</p>
    */
   SupportedInstanceTypes?: string[];
+
+  /**
+   * <p>Specifies the name and shape of the expected data inputs for your trained model with a JSON dictionary form.
+   *          This field is used for optimizing your model using SageMaker Neo. For more information, see
+   *          <a href="https://docs.aws.amazon.com/sagemaker/latest/APIReference/API_InputConfig.html#sagemaker-Type-InputConfig-DataInputConfig">DataInputConfig</a>.</p>
+   */
+  DataInputConfig?: string;
 }
 
 /**
@@ -2738,7 +2745,7 @@ export interface RecommendationJobInputConfig {
   /**
    * <p>The Amazon Resource Name (ARN) of a versioned model package.</p>
    */
-  ModelPackageVersionArn: string | undefined;
+  ModelPackageVersionArn?: string;
 
   /**
    * <p>Specifies the maximum duration of the job, in seconds.></p>
@@ -2816,6 +2823,11 @@ export interface RecommendationJobInputConfig {
    * <p>Inference Recommender provisions SageMaker endpoints with access to VPC in the inference recommendation job.</p>
    */
   VpcConfig?: RecommendationJobVpcConfig;
+
+  /**
+   * <p>The name of the created model.</p>
+   */
+  ModelName?: string;
 }
 
 export enum RecommendationJobType {

diff --git a/clients/client-sagemaker/src/models/models_2.ts b/clients/client-sagemaker/src/models/models_2.ts
@@ -3337,6 +3337,20 @@ export interface RecommendationMetrics {
    * <p>The expected model latency at maximum invocation per minute for the instance.</p>
    */
   ModelLatency: number | undefined;
+
+  /**
+   * <p>The expected CPU utilization at maximum invocations per minute for the instance.</p>
+   *          <p>
+   *             <code>NaN</code> indicates that the value is not available.</p>
+   */
+  CpuUtilization?: number;
+
+  /**
+   * <p>The expected memory utilization at maximum invocations per minute for the instance.</p>
+   *          <p>
+   *             <code>NaN</code> indicates that the value is not available.</p>
+   */
+  MemoryUtilization?: number;
 }
 
 /**
@@ -3372,6 +3386,11 @@ export interface ModelConfiguration {
    * <p>Defines the environment parameters that includes key, value types, and values.</p>
    */
   EnvironmentParameters?: EnvironmentParameter[];
+
+  /**
+   * <p>The name of the compilation job used to create the recommended model artifacts.</p>
+   */
+  CompilationJobName?: string;
 }
 
 /**
@@ -3392,6 +3411,11 @@ export interface InferenceRecommendation {
    * <p>Defines the model configuration.</p>
    */
   ModelConfiguration: ModelConfiguration | undefined;
+
+  /**
+   * <p>The recommendation ID which uniquely identifies each recommendation.</p>
+   */
+  RecommendationId?: string;
 }
 
 export enum RecommendationJobStatus {

diff --git a/clients/client-sagemaker/src/protocols/Aws_json1_1.ts b/clients/client-sagemaker/src/protocols/Aws_json1_1.ts
@@ -13687,13 +13687,19 @@ const deserializeAws_json1_1ListInferenceRecommendationsJobStepsCommandError = a
     body: await parseErrorBody(output.body, context),
   };
   const errorCode = loadRestJsonErrorCode(output, parsedOutput.body);
-  const parsedBody = parsedOutput.body;
-  throwDefaultError({
-    output,
-    parsedBody,
-    exceptionCtor: __BaseException,
-    errorCode,
-  });
+  switch (errorCode) {
+    case "ResourceNotFound":
+    case "com.amazonaws.sagemaker#ResourceNotFound":
+      throw await deserializeAws_json1_1ResourceNotFoundResponse(parsedOutput, context);
+    default:
+      const parsedBody = parsedOutput.body;
+      throwDefaultError({
+        output,
+        parsedBody,
+        exceptionCtor: __BaseException,
+        errorCode,
+      });
+  }
 };
 
 export const deserializeAws_json1_1ListLabelingJobsCommand = async (
@@ -24427,6 +24433,7 @@ const serializeAws_json1_1RecommendationJobContainerConfig = (
   context: __SerdeContext
 ): any => {
   return {
+    ...(input.DataInputConfig != null && { DataInputConfig: input.DataInputConfig }),
     ...(input.Domain != null && { Domain: input.Domain }),
     ...(input.Framework != null && { Framework: input.Framework }),
     ...(input.FrameworkVersion != null && { FrameworkVersion: input.FrameworkVersion }),
@@ -24457,6 +24464,7 @@ const serializeAws_json1_1RecommendationJobInputConfig = (
     }),
     ...(input.Endpoints != null && { Endpoints: serializeAws_json1_1Endpoints(input.Endpoints, context) }),
     ...(input.JobDurationInSeconds != null && { JobDurationInSeconds: input.JobDurationInSeconds }),
+    ...(input.ModelName != null && { ModelName: input.ModelName }),
     ...(input.ModelPackageVersionArn != null && { ModelPackageVersionArn: input.ModelPackageVersionArn }),
     ...(input.ResourceLimit != null && {
       ResourceLimit: serializeAws_json1_1RecommendationJobResourceLimit(input.ResourceLimit, context),
@@ -32466,6 +32474,7 @@ const deserializeAws_json1_1InferenceRecommendation = (
       output.ModelConfiguration != null
         ? deserializeAws_json1_1ModelConfiguration(output.ModelConfiguration, context)
         : undefined,
+    RecommendationId: __expectString(output.RecommendationId),
   } as any;
 };
 
@@ -34196,6 +34205,7 @@ const deserializeAws_json1_1ModelClientConfig = (output: any, context: __SerdeCo
 
 const deserializeAws_json1_1ModelConfiguration = (output: any, context: __SerdeContext): ModelConfiguration => {
   return {
+    CompilationJobName: __expectString(output.CompilationJobName),
     EnvironmentParameters:
       output.EnvironmentParameters != null
         ? deserializeAws_json1_1EnvironmentParameters(output.EnvironmentParameters, context)
@@ -36703,6 +36713,7 @@ const deserializeAws_json1_1RecommendationJobContainerConfig = (
   context: __SerdeContext
 ): RecommendationJobContainerConfig => {
   return {
+    DataInputConfig: __expectString(output.DataInputConfig),
     Domain: __expectString(output.Domain),
     Framework: __expectString(output.Framework),
     FrameworkVersion: __expectString(output.FrameworkVersion),
@@ -36752,6 +36763,7 @@ const deserializeAws_json1_1RecommendationJobInputConfig = (
         : undefined,
     Endpoints: output.Endpoints != null ? deserializeAws_json1_1Endpoints(output.Endpoints, context) : undefined,
     JobDurationInSeconds: __expectInt32(output.JobDurationInSeconds),
+    ModelName: __expectString(output.ModelName),
     ModelPackageVersionArn: __expectString(output.ModelPackageVersionArn),
     ResourceLimit:
       output.ResourceLimit != null
@@ -36875,7 +36887,9 @@ const deserializeAws_json1_1RecommendationMetrics = (output: any, context: __Ser
   return {
     CostPerHour: __limitedParseFloat32(output.CostPerHour),
     CostPerInference: __limitedParseFloat32(output.CostPerInference),
+    CpuUtilization: __limitedParseFloat32(output.CpuUtilization),
     MaxInvocations: __expectInt32(output.MaxInvocations),
+    MemoryUtilization: __limitedParseFloat32(output.MemoryUtilization),
     ModelLatency: __expectInt32(output.ModelLatency),
   } as any;
 };

diff --git a/codegen/sdk-codegen/aws-models/sagemaker.json b/codegen/sdk-codegen/aws-models/sagemaker.json
@@ -25771,6 +25771,12 @@
             "smithy.api#documentation": "<p>Defines the model configuration.</p>",
             "smithy.api#required": {}
           }
+        },
+        "RecommendationId": {
+          "target": "com.amazonaws.sagemaker#String",
+          "traits": {
+            "smithy.api#documentation": "<p>The recommendation ID which uniquely identifies each recommendation.</p>"
+          }
         }
       },
       "traits": {
@@ -30548,6 +30554,11 @@
       "output": {
         "target": "com.amazonaws.sagemaker#ListInferenceRecommendationsJobStepsResponse"
       },
+      "errors": [
+        {
+          "target": "com.amazonaws.sagemaker#ResourceNotFound"
+        }
+      ],
       "traits": {
         "smithy.api#documentation": "<p>Returns a list of the subtasks for an Inference Recommender job.</p>\n         <p>The supported subtasks are benchmarks, which evaluate the performance of your model on different instance types.</p>",
         "smithy.api#paginated": {
@@ -35334,6 +35345,12 @@
           "traits": {
             "smithy.api#documentation": "<p>Defines the environment parameters that includes key, value types, and values.</p>"
           }
+        },
+        "CompilationJobName": {
+          "target": "com.amazonaws.sagemaker#RecommendationJobCompilationJobName",
+          "traits": {
+            "smithy.api#documentation": "<p>The name of the compilation job used to create the recommended model artifacts.</p>"
+          }
         }
       },
       "traits": {
@@ -43631,6 +43648,16 @@
         "smithy.api#pattern": "^arn:aws[a-z\\-]*:sagemaker:[a-z0-9\\-]*:[0-9]{12}:inference-recommendations-job/"
       }
     },
+    "com.amazonaws.sagemaker#RecommendationJobCompilationJobName": {
+      "type": "string",
+      "traits": {
+        "smithy.api#length": {
+          "min": 1,
+          "max": 63
+        },
+        "smithy.api#pattern": "^[a-zA-Z0-9](-*[a-zA-Z0-9]){0,62}$"
+      }
+    },
     "com.amazonaws.sagemaker#RecommendationJobCompiledOutputConfig": {
       "type": "structure",
       "members": {
@@ -43689,12 +43716,28 @@
           "traits": {
             "smithy.api#documentation": "<p>A list of the instance types that are used to generate inferences in real-time.</p>"
           }
+        },
+        "DataInputConfig": {
+          "target": "com.amazonaws.sagemaker#RecommendationJobDataInputConfig",
+          "traits": {
+            "smithy.api#documentation": "<p>Specifies the name and shape of the expected data inputs for your trained model with a JSON dictionary form.\n         This field is used for optimizing your model using SageMaker Neo. For more information, see\n         <a href=\"https://docs.aws.amazon.com/sagemaker/latest/APIReference/API_InputConfig.html#sagemaker-Type-InputConfig-DataInputConfig\">DataInputConfig</a>.</p>"
+          }
         }
       },
       "traits": {
         "smithy.api#documentation": "<p>Specifies mandatory fields for running an Inference Recommender job directly in the\n         <a href=\"https://docs.aws.amazon.com/sagemaker/latest/APIReference/API_CreateInferenceRecommendationsJob.html\">CreateInferenceRecommendationsJob</a>\n         API. The fields specified in <code>ContainerConfig</code> override the corresponding fields in the model package. Use\n      <code>ContainerConfig</code> if you want to specify these fields for the recommendation job but don't want to edit them in your model package.</p>"
       }
     },
+    "com.amazonaws.sagemaker#RecommendationJobDataInputConfig": {
+      "type": "string",
+      "traits": {
+        "smithy.api#length": {
+          "min": 1,
+          "max": 1024
+        },
+        "smithy.api#pattern": "^[\\S\\s]+$"
+      }
+    },
     "com.amazonaws.sagemaker#RecommendationJobDescription": {
       "type": "string",
       "traits": {
@@ -43736,8 +43779,7 @@
         "ModelPackageVersionArn": {
           "target": "com.amazonaws.sagemaker#ModelPackageArn",
           "traits": {
-            "smithy.api#documentation": "<p>The Amazon Resource Name (ARN) of a versioned model package.</p>",
-            "smithy.api#required": {}
+            "smithy.api#documentation": "<p>The Amazon Resource Name (ARN) of a versioned model package.</p>"
           }
         },
         "JobDurationInSeconds": {
@@ -43787,6 +43829,12 @@
           "traits": {
             "smithy.api#documentation": "<p>Inference Recommender provisions SageMaker endpoints with access to VPC in the inference recommendation job.</p>"
           }
+        },
+        "ModelName": {
+          "target": "com.amazonaws.sagemaker#ModelName",
+          "traits": {
+            "smithy.api#documentation": "<p>The name of the created model.</p>"
+          }
         }
       },
       "traits": {
@@ -44054,6 +44102,18 @@
             "smithy.api#documentation": "<p>The expected model latency at maximum invocation per minute for the instance.</p>",
             "smithy.api#required": {}
           }
+        },
+        "CpuUtilization": {
+          "target": "com.amazonaws.sagemaker#UtilizationMetric",
+          "traits": {
+            "smithy.api#documentation": "<p>The expected CPU utilization at maximum invocations per minute for the instance.</p>\n         <p>\n            <code>NaN</code> indicates that the value is not available.</p>"
+          }
+        },
+        "MemoryUtilization": {
+          "target": "com.amazonaws.sagemaker#UtilizationMetric",
+          "traits": {
+            "smithy.api#documentation": "<p>The expected memory utilization at maximum invocations per minute for the instance.</p>\n         <p>\n            <code>NaN</code> indicates that the value is not available.</p>"
+          }
         }
       },
       "traits": {
@@ -55918,6 +55978,14 @@
         "smithy.api#documentation": "<p>A collection of settings that apply to users of Amazon SageMaker Studio. These settings are\n      specified when the <code>CreateUserProfile</code> API is called, and as <code>DefaultUserSettings</code>\n      when the <code>CreateDomain</code> API is called.</p>\n         <p>\n            <code>SecurityGroups</code> is aggregated when specified in both calls. For all other\n     settings in <code>UserSettings</code>, the values specified in <code>CreateUserProfile</code>\n     take precedence over those specified in <code>CreateDomain</code>.</p>"
       }
     },
+    "com.amazonaws.sagemaker#UtilizationMetric": {
+      "type": "float",
+      "traits": {
+        "smithy.api#range": {
+          "min": 0.0
+        }
+      }
+    },
     "com.amazonaws.sagemaker#ValidationFraction": {
       "type": "float",
       "traits": {