Skip to content

Commit

Permalink
feat: Adding tpu_topology to Vertex SDK
Browse files Browse the repository at this point in the history
PiperOrigin-RevId: 625144116
  • Loading branch information
vertex-sdk-bot authored and copybara-github committed Apr 16, 2024
1 parent 9c11ea5 commit 423c764
Show file tree
Hide file tree
Showing 2 changed files with 74 additions and 0 deletions.
32 changes: 32 additions & 0 deletions google/cloud/aiplatform/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -772,6 +772,7 @@ def deploy(
max_replica_count: int = 1,
accelerator_type: Optional[str] = None,
accelerator_count: Optional[int] = None,
tpu_topology: Optional[str] = None,
service_account: Optional[str] = None,
explanation_metadata: Optional[aiplatform.explain.ExplanationMetadata] = None,
explanation_parameters: Optional[
Expand Down Expand Up @@ -833,6 +834,9 @@ def deploy(
NVIDIA_TESLA_V100, NVIDIA_TESLA_P4, NVIDIA_TESLA_T4
accelerator_count (int):
Optional. The number of accelerators to attach to a worker replica.
tpu_topology (str):
Optional. The TPU topology to use for the DeployedModel.
Required for CloudTPU multihost deployments.
service_account (str):
The service account that the DeployedModel's container runs as. Specify the
email address of the service account. If this service account is not
Expand Down Expand Up @@ -896,6 +900,7 @@ def deploy(
max_replica_count=max_replica_count,
accelerator_type=accelerator_type,
accelerator_count=accelerator_count,
tpu_topology=tpu_topology,
service_account=service_account,
explanation_spec=explanation_spec,
metadata=metadata,
Expand All @@ -919,6 +924,7 @@ def _deploy(
max_replica_count: int = 1,
accelerator_type: Optional[str] = None,
accelerator_count: Optional[int] = None,
tpu_topology: Optional[str] = None,
service_account: Optional[str] = None,
explanation_spec: Optional[aiplatform.explain.ExplanationSpec] = None,
metadata: Optional[Sequence[Tuple[str, str]]] = (),
Expand Down Expand Up @@ -977,6 +983,9 @@ def _deploy(
NVIDIA_TESLA_V100, NVIDIA_TESLA_P4, NVIDIA_TESLA_T4
accelerator_count (int):
Optional. The number of accelerators to attach to a worker replica.
tpu_topology (str):
Optional. The TPU topology to use for the DeployedModel.
Required for CloudTPU multihost deployments.
service_account (str):
The service account that the DeployedModel's container runs as. Specify the
email address of the service account. If this service account is not
Expand Down Expand Up @@ -1026,6 +1035,7 @@ def _deploy(
max_replica_count=max_replica_count,
accelerator_type=accelerator_type,
accelerator_count=accelerator_count,
tpu_topology=tpu_topology,
service_account=service_account,
explanation_spec=explanation_spec,
metadata=metadata,
Expand Down Expand Up @@ -1056,6 +1066,7 @@ def _deploy_call(
max_replica_count: int = 1,
accelerator_type: Optional[str] = None,
accelerator_count: Optional[int] = None,
tpu_topology: Optional[str] = None,
service_account: Optional[str] = None,
explanation_spec: Optional[aiplatform.explain.ExplanationSpec] = None,
metadata: Optional[Sequence[Tuple[str, str]]] = (),
Expand Down Expand Up @@ -1123,6 +1134,9 @@ def _deploy_call(
NVIDIA_TESLA_V100, NVIDIA_TESLA_P4, NVIDIA_TESLA_T4
accelerator_count (int):
Optional. The number of accelerators to attach to a worker replica.
tpu_topology (str):
Optional. The TPU topology to use for the DeployedModel.
Required for CloudTPU multihost deployments.
service_account (str):
The service account that the DeployedModel's container runs as. Specify the
email address of the service account. If this service account is not
Expand Down Expand Up @@ -1250,6 +1264,9 @@ def _deploy_call(
[autoscaling_metric_spec]
)

if tpu_topology is not None:
machine_spec.tpu_topology = tpu_topology

dedicated_resources.machine_spec = machine_spec
deployed_model.dedicated_resources = dedicated_resources

Expand Down Expand Up @@ -2440,6 +2457,7 @@ def deploy(
max_replica_count: int = 1,
accelerator_type: Optional[str] = None,
accelerator_count: Optional[int] = None,
tpu_topology: Optional[str] = None,
service_account: Optional[str] = None,
explanation_metadata: Optional[aiplatform.explain.ExplanationMetadata] = None,
explanation_parameters: Optional[
Expand Down Expand Up @@ -2487,6 +2505,9 @@ def deploy(
NVIDIA_TESLA_V100, NVIDIA_TESLA_P4, NVIDIA_TESLA_T4
accelerator_count (int):
Optional. The number of accelerators to attach to a worker replica.
tpu_topology (str):
Optional. The TPU topology to use for the DeployedModel.
Required for CloudTPU multihost deployments.
service_account (str):
The service account that the DeployedModel's container runs as. Specify the
email address of the service account. If this service account is not
Expand Down Expand Up @@ -2534,6 +2555,7 @@ def deploy(
max_replica_count=max_replica_count,
accelerator_type=accelerator_type,
accelerator_count=accelerator_count,
tpu_topology=tpu_topology,
service_account=service_account,
explanation_spec=explanation_spec,
metadata=metadata,
Expand Down Expand Up @@ -3442,6 +3464,7 @@ def deploy(
max_replica_count: int = 1,
accelerator_type: Optional[str] = None,
accelerator_count: Optional[int] = None,
tpu_topology: Optional[str] = None,
service_account: Optional[str] = None,
explanation_metadata: Optional[aiplatform.explain.ExplanationMetadata] = None,
explanation_parameters: Optional[
Expand Down Expand Up @@ -3505,6 +3528,9 @@ def deploy(
NVIDIA_TESLA_V100, NVIDIA_TESLA_P4, NVIDIA_TESLA_T4
accelerator_count (int):
Optional. The number of accelerators to attach to a worker replica.
tpu_topology (str):
Optional. The TPU topology to use for the DeployedModel.
Requireid for CloudTPU multihost deployments.
service_account (str):
The service account that the DeployedModel's container runs as. Specify the
email address of the service account. If this service account is not
Expand Down Expand Up @@ -3601,6 +3627,7 @@ def deploy(
max_replica_count=max_replica_count,
accelerator_type=accelerator_type,
accelerator_count=accelerator_count,
tpu_topology=tpu_topology,
service_account=service_account,
explanation_spec=explanation_spec,
metadata=metadata,
Expand All @@ -3627,6 +3654,7 @@ def _deploy(
max_replica_count: int = 1,
accelerator_type: Optional[str] = None,
accelerator_count: Optional[int] = None,
tpu_topology: Optional[str] = None,
service_account: Optional[str] = None,
explanation_spec: Optional[aiplatform.explain.ExplanationSpec] = None,
metadata: Optional[Sequence[Tuple[str, str]]] = (),
Expand Down Expand Up @@ -3687,6 +3715,9 @@ def _deploy(
NVIDIA_TESLA_V100, NVIDIA_TESLA_P4, NVIDIA_TESLA_T4
accelerator_count (int):
Optional. The number of accelerators to attach to a worker replica.
tpu_topology (str):
Optional. The TPU topology to use for the DeployedModel.
Requireid for CloudTPU multihost deployments.
service_account (str):
The service account that the DeployedModel's container runs as. Specify the
email address of the service account. If this service account is not
Expand Down Expand Up @@ -3777,6 +3808,7 @@ def _deploy(
max_replica_count=max_replica_count,
accelerator_type=accelerator_type,
accelerator_count=accelerator_count,
tpu_topology=tpu_topology,
service_account=service_account,
explanation_spec=explanation_spec,
metadata=metadata,
Expand Down
42 changes: 42 additions & 0 deletions tests/unit/aiplatform/test_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,9 @@
_TEST_STARTING_REPLICA_COUNT = 2
_TEST_MAX_REPLICA_COUNT = 12

_TEST_TPU_MACHINE_TYPE = "ct5lp-hightpu-4t"
_TEST_TPU_TOPOLOGY = "2x2"

_TEST_BATCH_SIZE = 16

_TEST_PIPELINE_RESOURCE_NAME = (
Expand Down Expand Up @@ -2077,6 +2080,45 @@ def test_deploy_no_endpoint_dedicated_resources(self, deploy_model_mock, sync):
timeout=None,
)

@pytest.mark.usefixtures(
"get_endpoint_mock", "get_model_mock", "create_endpoint_mock"
)
@pytest.mark.parametrize("sync", [True, False])
def test_deploy_no_endpoint_with_tpu_topology(self, deploy_model_mock, sync):
test_model = models.Model(_TEST_ID)
test_model._gca_resource.supported_deployment_resources_types.append(
aiplatform.gapic.Model.DeploymentResourcesType.DEDICATED_RESOURCES
)
test_endpoint = test_model.deploy(
machine_type=_TEST_TPU_MACHINE_TYPE,
tpu_topology=_TEST_TPU_TOPOLOGY,
sync=sync,
deploy_request_timeout=None,
)

if not sync:
test_endpoint.wait()

expected_machine_spec = gca_machine_resources.MachineSpec(
machine_type=_TEST_TPU_MACHINE_TYPE,
tpu_topology=_TEST_TPU_TOPOLOGY,
)
expected_dedicated_resources = gca_machine_resources.DedicatedResources(
machine_spec=expected_machine_spec, min_replica_count=1, max_replica_count=1
)
expected_deployed_model = gca_endpoint.DeployedModel(
dedicated_resources=expected_dedicated_resources,
model=test_model.resource_name,
display_name=None,
)
deploy_model_mock.assert_called_once_with(
endpoint=test_endpoint.resource_name,
deployed_model=expected_deployed_model,
traffic_split={"0": 100},
metadata=(),
timeout=None,
)

@pytest.mark.usefixtures(
"get_endpoint_mock", "get_model_mock", "create_endpoint_mock"
)
Expand Down

0 comments on commit 423c764

Please sign in to comment.