From 518f0dcd532870465684fe5976afb0d905538d2b Mon Sep 17 00:00:00 2001 From: Gary Wang Date: Fri, 14 Apr 2023 20:23:17 +0000 Subject: [PATCH 1/4] fix: downgrade sklearn fw version to a regex format that ir supports --- tests/integ/test_inference_recommender.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integ/test_inference_recommender.py b/tests/integ/test_inference_recommender.py index d2795aad2b..2118427788 100644 --- a/tests/integ/test_inference_recommender.py +++ b/tests/integ/test_inference_recommender.py @@ -39,7 +39,7 @@ IR_SKLEARN_DATA = os.path.join(IR_DIR, "sample.csv") IR_SKLEARN_CONTENT_TYPE = ["text/csv"] IR_SKLEARN_FRAMEWORK = "SAGEMAKER-SCIKIT-LEARN" -IR_SKLEARN_FRAMEWORK_VERSION = "1.0-1" +IR_SKLEARN_FRAMEWORK_VERSION = "0.20.0" def retry_and_back_off(right_size_fn): From a13179f35694c367f15f7c753ae35f8c748f42b8 Mon Sep 17 00:00:00 2001 From: Gary Wang Date: Fri, 14 Apr 2023 20:53:41 +0000 Subject: [PATCH 2/4] Revert "fix: downgrade sklearn fw version to a regex format that ir supports" This reverts commit ed44bd8f4e2dd548bb7d77199edaf935b7110823. --- tests/integ/test_inference_recommender.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integ/test_inference_recommender.py b/tests/integ/test_inference_recommender.py index 2118427788..d2795aad2b 100644 --- a/tests/integ/test_inference_recommender.py +++ b/tests/integ/test_inference_recommender.py @@ -39,7 +39,7 @@ IR_SKLEARN_DATA = os.path.join(IR_DIR, "sample.csv") IR_SKLEARN_CONTENT_TYPE = ["text/csv"] IR_SKLEARN_FRAMEWORK = "SAGEMAKER-SCIKIT-LEARN" -IR_SKLEARN_FRAMEWORK_VERSION = "0.20.0" +IR_SKLEARN_FRAMEWORK_VERSION = "1.0-1" def retry_and_back_off(right_size_fn): From 80ba644a887bfe6e028810dbb5d8478296d6495a Mon Sep 17 00:00:00 2001 From: Gary Wang Date: Wed, 23 Aug 2023 17:45:50 +0000 Subject: [PATCH 3/4] only pick from realtime inference for now --- .../inference_recommender_mixin.py | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/src/sagemaker/inference_recommender/inference_recommender_mixin.py b/src/sagemaker/inference_recommender/inference_recommender_mixin.py index 04f0905b40..a7b646f0fd 100644 --- a/src/sagemaker/inference_recommender/inference_recommender_mixin.py +++ b/src/sagemaker/inference_recommender/inference_recommender_mixin.py @@ -306,7 +306,7 @@ def _update_params_for_right_size( initial_instance_count = self.inference_recommendations[0]["EndpointConfiguration"][ "InitialInstanceCount" ] - return (instance_type, initial_instance_count) + return self._filter_recommendations_for_realtime() def _update_params_for_recommendation_id( self, @@ -610,3 +610,15 @@ def _search_recommendation(self, recommendation_list, inference_recommendation_i ), None, ) + + # TODO: until we have bandwidth to integrate right_size + deploy with serverless + def _filter_recommendations_for_realtime(self): + instance_type = None + initial_instance_count = None + for recommendations in self.inference_recommendations: + if not "serverlessConfig" in recommendations["EndpointConfiguration"]: + instance_type = recommendations["EndpointConfiguration"]["InstanceType"] + initial_instance_count = recommendations["EndpointConfiguration"][ + "InitialInstanceCount" + ] + return (instance_type, initial_instance_count) From e51332260533c071745442e44d6e35fb04625098 Mon Sep 17 00:00:00 2001 From: Gary Wang Date: Wed, 23 Aug 2023 17:48:37 +0000 Subject: [PATCH 4/4] clean up --- .../inference_recommender/inference_recommender_mixin.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/sagemaker/inference_recommender/inference_recommender_mixin.py b/src/sagemaker/inference_recommender/inference_recommender_mixin.py index a7b646f0fd..9ec8fb7c05 100644 --- a/src/sagemaker/inference_recommender/inference_recommender_mixin.py +++ b/src/sagemaker/inference_recommender/inference_recommender_mixin.py @@ -302,10 +302,6 @@ def _update_params_for_right_size( ) return None - instance_type = self.inference_recommendations[0]["EndpointConfiguration"]["InstanceType"] - initial_instance_count = self.inference_recommendations[0]["EndpointConfiguration"][ - "InitialInstanceCount" - ] return self._filter_recommendations_for_realtime() def _update_params_for_recommendation_id(