feat: Add tune_model and deploy_tuned_model for TextEmbeddingModel.

PiperOrigin-RevId: 629886655
googleapis · May 2, 2024 · 42f5d6f · 42f5d6f
1 parent 021d59f
commit 42f5d6f
Show file tree

Hide file tree

Showing 2 changed files with 389 additions and 26 deletions.
diff --git a/tests/unit/aiplatform/test_language_models.py b/tests/unit/aiplatform/test_language_models.py
@@ -101,7 +101,18 @@
         "prediction_schema_uri": "gs://google-cloud-aiplatform/schema/predict/prediction/text_generation_1.0.0.yaml",
     },
 }
-
+_TEXT_GECKO_PUBLISHER_MODEL_DICT = {
+    "name": "publishers/google/models/textembedding-gecko",
+    "version_id": "003",
+    "open_source_category": "PROPRIETARY",
+    "launch_stage": gca_publisher_model.PublisherModel.LaunchStage.GA,
+    "publisher_model_template": "projects/{user-project}/locations/{location}/publishers/google/models/textembedding-gecko@003",
+    "predict_schemata": {
+        "instance_schema_uri": "gs://google-cloud-aiplatform/schema/predict/instance/text_embedding_1.0.0.yaml",
+        "parameters_schema_uri": "gs://google-cloud-aiplatfrom/schema/predict/params/text_embedding_1.0.0.yaml",
+        "prediction_schema_uri": "gs://google-cloud-aiplatform/schema/predict/prediction/text_embedding_1.0.0.yaml",
+    },
+}
 _CHAT_BISON_PUBLISHER_MODEL_DICT = {
     "name": "publishers/google/models/chat-bison",
     "version_id": "001",
@@ -528,6 +539,105 @@ def reverse_string_2(s):""",
     },
 )
 
+_EMBEDDING_MODEL_TUNING_PIPELINE_SPEC = {
+    "components": {},
+    "deploymentSpec": {},
+    "pipelineInfo": {
+        "description": "Pipeline definition for v1.1.x embedding tuning pipelines.",
+        "name": "tune-text-embedding-model",
+    },
+    "root": {
+        "dag": {"tasks": {}},
+        "inputDefinitions": {
+            "parameters": {
+                "accelerator_count": {
+                    "defaultValue": 4,
+                    "description": "how many accelerators to use when running the\ncontainer.",
+                    "isOptional": True,
+                    "parameterType": "NUMBER_INTEGER",
+                },
+                "accelerator_type": {
+                    "defaultValue": "NVIDIA_TESLA_V100",
+                    "description": "the accelerator type for running the trainer component.",
+                    "isOptional": True,
+                    "parameterType": "STRING",
+                },
+                "base_model_version_id": {
+                    "defaultValue": "textembedding-gecko@001",
+                    "description": "which base model to tune. This may be any stable\nnumbered version, for example `textembedding-gecko@001`.",
+                    "isOptional": True,
+                    "parameterType": "STRING",
+                },
+                "batch_size": {
+                    "defaultValue": 128,
+                    "description": "training batch size.",
+                    "isOptional": True,
+                    "parameterType": "NUMBER_INTEGER",
+                },
+                "corpus_path": {
+                    "description": "the GCS path to the corpus data location.",
+                    "parameterType": "STRING",
+                },
+                "iterations": {
+                    "defaultValue": 1000,
+                    "description": "the number of steps to perform fine-tuning.",
+                    "isOptional": True,
+                    "parameterType": "NUMBER_INTEGER",
+                },
+                "location": {
+                    "defaultValue": "us-central1",
+                    "description": "GCP region to run the pipeline.",
+                    "isOptional": True,
+                    "parameterType": "STRING",
+                },
+                "machine_type": {
+                    "defaultValue": "n1-standard-16",
+                    "description": "the type of the machine to run the trainer component. For\nmore details about this input config, see:\nhttps://cloud.google.com/vertex-ai/docs/training/configure-compute.",
+                    "isOptional": True,
+                    "parameterType": "STRING",
+                },
+                "model_display_name": {
+                    "defaultValue": "tuned-text-embedding-model",
+                    "description": "output model display name.",
+                    "isOptional": True,
+                    "parameterType": "STRING",
+                },
+                "project": {
+                    "description": "user's project id.",
+                    "parameterType": "STRING",
+                },
+                "queries_path": {
+                    "description": "the GCS path to the queries location.",
+                    "parameterType": "STRING",
+                },
+                "task_type": {
+                    "defaultValue": "DEFAULT",
+                    "description": "the task type expected to be used during inference. Valid\nvalues are `DEFAULT`, `RETRIEVAL_QUERY`, `RETRIEVAL_DOCUMENT`,\n`SEMANTIC_SIMILARITY`, `CLASSIFICATION`, and `CLUSTERING`.",
+                    "isOptional": True,
+                    "parameterType": "STRING",
+                },
+                "test_label_path": {
+                    "defaultValue": "",
+                    "description": "the GCS path to the test label data location.",
+                    "isOptional": True,
+                    "parameterType": "STRING",
+                },
+                "train_label_path": {
+                    "description": "the GCS path to the train label data location.",
+                    "parameterType": "STRING",
+                },
+                "validation_label_path": {
+                    "defaultValue": "",
+                    "description": "The GCS path to the validation label data location.",
+                    "isOptional": True,
+                    "parameterType": "STRING",
+                },
+            }
+        },
+    },
+    "schemaVersion": "2.1.0",
+    "sdkVersion": "kfp-2.6.0",
+}
 _TEST_PIPELINE_SPEC = {
     "components": {},
     "pipelineInfo": {"name": "evaluation-llm-text-generation-pipeline"},
@@ -641,6 +751,9 @@ def reverse_string_2(s):""",
 }
 
 
+_EMBEDDING_MODEL_TUNING_PIPELINE_SPEC_JSON = json.dumps(
+    _EMBEDDING_MODEL_TUNING_PIPELINE_SPEC,
+)
 _TEST_PIPELINE_SPEC_JSON = json.dumps(
     _TEST_PIPELINE_SPEC,
 )
@@ -1460,6 +1573,18 @@ def mock_request_urlopen(request: str) -> Tuple[str, mock.MagicMock]:
         yield request.param, mock_urlopen
 
 
+@pytest.fixture
+def mock_request_urlopen_gecko(request: str) -> Tuple[str, mock.MagicMock]:
+    data = _EMBEDDING_MODEL_TUNING_PIPELINE_SPEC
+    with mock.patch.object(urllib_request, "urlopen") as mock_urlopen:
+        mock_read_response = mock.MagicMock()
+        mock_decode_response = mock.MagicMock()
+        mock_decode_response.return_value = json.dumps(data)
+        mock_read_response.return_value.decode = mock_decode_response
+        mock_urlopen.return_value.read = mock_read_response
+        yield request.param, mock_urlopen
+
+
 @pytest.fixture
 def mock_request_urlopen_rlhf(request: str) -> Tuple[str, mock.MagicMock]:
     data = _TEST_RLHF_PIPELINE_SPEC
@@ -1528,6 +1653,21 @@ def get_endpoint_mock():
         yield get_endpoint_mock
 
 
+@pytest.fixture
+def mock_deploy_tuned_embedding_model(get_endpoint_mock):
+    with mock.patch.object(
+        _language_models._TunableTextEmbeddingModelMixin, "deploy_tuned_model"
+    ) as mock_text_generation_model:
+        mock_text_generation_model.return_value._model_id = (
+            test_constants.ModelConstants._TEST_MODEL_RESOURCE_NAME
+        )
+        mock_text_generation_model.return_value._endpoint_name = (
+            test_constants.EndpointConstants._TEST_ENDPOINT_NAME
+        )
+        mock_text_generation_model.return_value._endpoint = get_endpoint_mock
+        yield mock_text_generation_model
+
+
 @pytest.fixture
 def mock_get_tuned_model(get_endpoint_mock):
     with mock.patch.object(
@@ -2134,6 +2274,66 @@ def test_text_generation_response_repr(self):
         assert "blocked" in response_repr
         assert "Violent" in response_repr
 
+    @pytest.mark.parametrize(
+        "job_spec",
+        [_EMBEDDING_MODEL_TUNING_PIPELINE_SPEC_JSON],
+    )
+    @pytest.mark.parametrize(
+        "mock_request_urlopen_gecko",
+        ["https://us-central1-kfp.pkg.dev/proj/repo/pack/latest"],
+        indirect=True,
+    )
+    def test_tune_text_embedding_model(
+        self,
+        mock_pipeline_service_create,
+        mock_pipeline_job_get,
+        mock_pipeline_bucket_exists,
+        job_spec,
+        mock_load_yaml_and_json,
+        mock_gcs_from_string,
+        mock_gcs_upload,
+        mock_request_urlopen_gecko,
+        mock_deploy_tuned_embedding_model,
+    ):
+        """Tests tuning the text embedding model."""
+        aiplatform.init(
+            project=_TEST_PROJECT,
+            location=_TEST_LOCATION,
+            encryption_spec_key_name=_TEST_ENCRYPTION_KEY_NAME,
+        )
+        with mock.patch.object(
+            target=model_garden_service_client.ModelGardenServiceClient,
+            attribute="get_publisher_model",
+            return_value=gca_publisher_model.PublisherModel(
+                _TEXT_GECKO_PUBLISHER_MODEL_DICT
+            ),
+        ):
+            model = language_models.TextEmbeddingModel.from_pretrained(
+                "textembedding-gecko@003"
+            )
+            tuning_job = model.tune_model(
+                training_data="gs://bucket/training.tsv",
+                corpus_data="gs://bucket/corpus.jsonl",
+                queries_data="gs://bucket/queries.jsonl",
+                test_data="gs://bucket/test.tsv",
+                tuned_model_location="us-central1",
+                train_steps=10,
+                accelerator="NVIDIA_TESLA_A100",
+            )
+            call_kwargs = mock_pipeline_service_create.call_args[1]
+            pipeline_arguments = call_kwargs[
+                "pipeline_job"
+            ].runtime_config.parameter_values
+            assert pipeline_arguments["iterations"] == 10
+            assert pipeline_arguments["accelerator_type"] == "NVIDIA_TESLA_A100"
+
+            # Testing the tuned model
+            tuned_model = tuning_job.deploy_tuned_model()
+            assert (
+                tuned_model._endpoint_name
+                == test_constants.EndpointConstants._TEST_ENDPOINT_NAME
+            )
+
     @pytest.mark.parametrize(
         "job_spec",
         [_TEST_PIPELINE_SPEC_JSON, _TEST_PIPELINE_JOB],