Skip to content

Commit

Permalink
fix: LLM - De-hardcoded the max_output_tokens default value for the…
Browse files Browse the repository at this point in the history
… `CodeGenerationModel`

The previously default value (128) was inconsistent with the service-side default values of the models: `code-bison` has 1024 and `code-gecko` has 64.
More so, the default value was out of range of the `code-gecko` model.

This CL fixes these issues.

The SDK now relies on the service-side default values when the user does not pass a parameter value explicitly.

What can change: When using the `code-bison` model, the default value of `max_output_tokens` effectively increases from 128 to 1024 (the current service-side default value).

PiperOrigin-RevId: 559266968
  • Loading branch information
Ark-kun authored and Copybara-Service committed Aug 23, 2023
1 parent d11b8e6 commit f5a20eb
Show file tree
Hide file tree
Showing 2 changed files with 5 additions and 12 deletions.
10 changes: 2 additions & 8 deletions tests/unit/aiplatform/test_language_models.py
Expand Up @@ -2187,9 +2187,6 @@ def test_code_generation(self):
# Validating the parameters
predict_temperature = 0.1
predict_max_output_tokens = 100
default_max_output_tokens = (
language_models.CodeGenerationModel._DEFAULT_MAX_OUTPUT_TOKENS
)
stop_sequences = ["\n"]

with mock.patch.object(
Expand All @@ -2213,7 +2210,7 @@ def test_code_generation(self):
)
prediction_parameters = mock_predict.call_args[1]["parameters"]
assert "temperature" not in prediction_parameters
assert prediction_parameters["maxOutputTokens"] == default_max_output_tokens
assert "maxOutputTokens" not in prediction_parameters

def test_code_completion(self):
"""Tests code completion with the code generation model."""
Expand Down Expand Up @@ -2255,9 +2252,6 @@ def test_code_completion(self):
# Validating the parameters
predict_temperature = 0.1
predict_max_output_tokens = 100
default_max_output_tokens = (
language_models.CodeGenerationModel._DEFAULT_MAX_OUTPUT_TOKENS
)

with mock.patch.object(
target=prediction_service_client.PredictionServiceClient,
Expand All @@ -2278,7 +2272,7 @@ def test_code_completion(self):
)
prediction_parameters = mock_predict.call_args[1]["parameters"]
assert "temperature" not in prediction_parameters
assert prediction_parameters["maxOutputTokens"] == default_max_output_tokens
assert "maxOutputTokens" not in prediction_parameters

def test_code_generation_model_predict_streaming(self):
"""Tests the TextGenerationModel.predict_streaming method."""
Expand Down
7 changes: 3 additions & 4 deletions vertexai/language_models/_language_models.py
Expand Up @@ -1686,14 +1686,13 @@ class CodeGenerationModel(_LanguageModel):
_INSTANCE_SCHEMA_URI = "gs://google-cloud-aiplatform/schema/predict/instance/code_generation_1.0.0.yaml"

_LAUNCH_STAGE = _model_garden_models._SDK_GA_LAUNCH_STAGE
_DEFAULT_MAX_OUTPUT_TOKENS = 128

def _create_prediction_request(
self,
prefix: str,
suffix: Optional[str] = None,
*,
max_output_tokens: Optional[int] = _DEFAULT_MAX_OUTPUT_TOKENS,
max_output_tokens: Optional[int] = None,
temperature: Optional[float] = None,
stop_sequences: Optional[List[str]] = None,
) -> _PredictionRequest:
Expand Down Expand Up @@ -1732,7 +1731,7 @@ def predict(
prefix: str,
suffix: Optional[str] = None,
*,
max_output_tokens: Optional[int] = _DEFAULT_MAX_OUTPUT_TOKENS,
max_output_tokens: Optional[int] = None,
temperature: Optional[float] = None,
stop_sequences: Optional[List[str]] = None,
) -> "TextGenerationResponse":
Expand Down Expand Up @@ -1771,7 +1770,7 @@ def predict_streaming(
prefix: str,
suffix: Optional[str] = None,
*,
max_output_tokens: Optional[int] = _DEFAULT_MAX_OUTPUT_TOKENS,
max_output_tokens: Optional[int] = None,
temperature: Optional[float] = None,
) -> Iterator[TextGenerationResponse]:
"""Predicts the code based on previous code.
Expand Down

0 comments on commit f5a20eb

Please sign in to comment.