diff --git a/src/sagemaker/inference_recommender/inference_recommender_mixin.py b/src/sagemaker/inference_recommender/inference_recommender_mixin.py index 04f0905b40..9ec8fb7c05 100644 --- a/src/sagemaker/inference_recommender/inference_recommender_mixin.py +++ b/src/sagemaker/inference_recommender/inference_recommender_mixin.py @@ -302,11 +302,7 @@ def _update_params_for_right_size( ) return None - instance_type = self.inference_recommendations[0]["EndpointConfiguration"]["InstanceType"] - initial_instance_count = self.inference_recommendations[0]["EndpointConfiguration"][ - "InitialInstanceCount" - ] - return (instance_type, initial_instance_count) + return self._filter_recommendations_for_realtime() def _update_params_for_recommendation_id( self, @@ -610,3 +606,15 @@ def _search_recommendation(self, recommendation_list, inference_recommendation_i ), None, ) + + # TODO: until we have bandwidth to integrate right_size + deploy with serverless + def _filter_recommendations_for_realtime(self): + instance_type = None + initial_instance_count = None + for recommendations in self.inference_recommendations: + if not "serverlessConfig" in recommendations["EndpointConfiguration"]: + instance_type = recommendations["EndpointConfiguration"]["InstanceType"] + initial_instance_count = recommendations["EndpointConfiguration"][ + "InitialInstanceCount" + ] + return (instance_type, initial_instance_count)