From f5a20eb381af0685d8b6fffad085ded87f4cf5e3 Mon Sep 17 00:00:00 2001 From: Alexey Volkov Date: Tue, 22 Aug 2023 17:14:35 -0700 Subject: [PATCH] fix: LLM - De-hardcoded the `max_output_tokens` default value for the `CodeGenerationModel` The previously default value (128) was inconsistent with the service-side default values of the models: `code-bison` has 1024 and `code-gecko` has 64. More so, the default value was out of range of the `code-gecko` model. This CL fixes these issues. The SDK now relies on the service-side default values when the user does not pass a parameter value explicitly. What can change: When using the `code-bison` model, the default value of `max_output_tokens` effectively increases from 128 to 1024 (the current service-side default value). PiperOrigin-RevId: 559266968 --- tests/unit/aiplatform/test_language_models.py | 10 ++-------- vertexai/language_models/_language_models.py | 7 +++---- 2 files changed, 5 insertions(+), 12 deletions(-) diff --git a/tests/unit/aiplatform/test_language_models.py b/tests/unit/aiplatform/test_language_models.py index c3836f88cb..2106b0815f 100644 --- a/tests/unit/aiplatform/test_language_models.py +++ b/tests/unit/aiplatform/test_language_models.py @@ -2187,9 +2187,6 @@ def test_code_generation(self): # Validating the parameters predict_temperature = 0.1 predict_max_output_tokens = 100 - default_max_output_tokens = ( - language_models.CodeGenerationModel._DEFAULT_MAX_OUTPUT_TOKENS - ) stop_sequences = ["\n"] with mock.patch.object( @@ -2213,7 +2210,7 @@ def test_code_generation(self): ) prediction_parameters = mock_predict.call_args[1]["parameters"] assert "temperature" not in prediction_parameters - assert prediction_parameters["maxOutputTokens"] == default_max_output_tokens + assert "maxOutputTokens" not in prediction_parameters def test_code_completion(self): """Tests code completion with the code generation model.""" @@ -2255,9 +2252,6 @@ def test_code_completion(self): # Validating the parameters predict_temperature = 0.1 predict_max_output_tokens = 100 - default_max_output_tokens = ( - language_models.CodeGenerationModel._DEFAULT_MAX_OUTPUT_TOKENS - ) with mock.patch.object( target=prediction_service_client.PredictionServiceClient, @@ -2278,7 +2272,7 @@ def test_code_completion(self): ) prediction_parameters = mock_predict.call_args[1]["parameters"] assert "temperature" not in prediction_parameters - assert prediction_parameters["maxOutputTokens"] == default_max_output_tokens + assert "maxOutputTokens" not in prediction_parameters def test_code_generation_model_predict_streaming(self): """Tests the TextGenerationModel.predict_streaming method.""" diff --git a/vertexai/language_models/_language_models.py b/vertexai/language_models/_language_models.py index ef80a778e2..1a646ca601 100644 --- a/vertexai/language_models/_language_models.py +++ b/vertexai/language_models/_language_models.py @@ -1686,14 +1686,13 @@ class CodeGenerationModel(_LanguageModel): _INSTANCE_SCHEMA_URI = "gs://google-cloud-aiplatform/schema/predict/instance/code_generation_1.0.0.yaml" _LAUNCH_STAGE = _model_garden_models._SDK_GA_LAUNCH_STAGE - _DEFAULT_MAX_OUTPUT_TOKENS = 128 def _create_prediction_request( self, prefix: str, suffix: Optional[str] = None, *, - max_output_tokens: Optional[int] = _DEFAULT_MAX_OUTPUT_TOKENS, + max_output_tokens: Optional[int] = None, temperature: Optional[float] = None, stop_sequences: Optional[List[str]] = None, ) -> _PredictionRequest: @@ -1732,7 +1731,7 @@ def predict( prefix: str, suffix: Optional[str] = None, *, - max_output_tokens: Optional[int] = _DEFAULT_MAX_OUTPUT_TOKENS, + max_output_tokens: Optional[int] = None, temperature: Optional[float] = None, stop_sequences: Optional[List[str]] = None, ) -> "TextGenerationResponse": @@ -1771,7 +1770,7 @@ def predict_streaming( prefix: str, suffix: Optional[str] = None, *, - max_output_tokens: Optional[int] = _DEFAULT_MAX_OUTPUT_TOKENS, + max_output_tokens: Optional[int] = None, temperature: Optional[float] = None, ) -> Iterator[TextGenerationResponse]: """Predicts the code based on previous code.