Skip to content

Commit

Permalink
chore: fix CustomJob system tests
Browse files Browse the repository at this point in the history
PiperOrigin-RevId: 636035683
  • Loading branch information
jaycee-li authored and Copybara-Service committed May 22, 2024
1 parent 2f08f53 commit 9936514
Show file tree
Hide file tree
Showing 2 changed files with 35 additions and 33 deletions.
4 changes: 2 additions & 2 deletions tests/system/aiplatform/e2e_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -206,8 +206,8 @@ def tear_down_resources(self, shared_state: Dict[str, Any]):
resource.delete(delete_backing_tensorboard_runs=True)
else:
resource.delete()
except exceptions.GoogleAPIError as e:
logging.error(f"Could not delete resource: {resource} due to: {e}")
except (exceptions.GoogleAPIError, RuntimeError) as e:
logging.exception(f"Could not delete resource: {resource} due to: {e}")

@pytest.fixture(scope="session")
def event_loop(event_loop):
Expand Down
64 changes: 33 additions & 31 deletions tests/system/aiplatform/test_custom_job.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,14 +61,8 @@ class TestCustomJob(e2e_base.TestEndToEnd):
_temp_prefix = "temp-vertex-sdk-custom-job"

def setup_class(cls):
cls._backing_tensorboard = aiplatform.Tensorboard.create(
project=e2e_base._PROJECT,
location=e2e_base._LOCATION,
display_name=cls._make_display_name("tensorboard")[:64],
)

cls._experiment_name = cls._temp_prefix + "-experiment"
cls._experiment_run_name = cls._temp_prefix + "-experiment-run"
cls._experiment_name = cls._make_display_name("experiment")[:60]
cls._experiment_run_name = cls._make_display_name("experiment-run")[:60]

project_number = resource_manager_utils.get_project_number(e2e_base._PROJECT)
cls._service_account = f"{project_number}-compute@developer.gserviceaccount.com"
Expand All @@ -90,9 +84,10 @@ def test_from_local_script_prebuilt_container(self, shared_state):
container_uri=_PREBUILT_CONTAINER_IMAGE,
requirements=["scikit-learn", "pandas"],
)
custom_job.run()

shared_state["resources"].append(custom_job)
try:
custom_job.run()
finally:
shared_state["resources"].append(custom_job)

assert custom_job.state == gca_job_state.JobState.JOB_STATE_SUCCEEDED

Expand All @@ -112,9 +107,10 @@ def test_from_local_script_custom_container(self, shared_state):
container_uri=_CUSTOM_CONTAINER_IMAGE,
requirements=["scikit-learn", "pandas"],
)
custom_job.run()

shared_state["resources"].append(custom_job)
try:
custom_job.run()
finally:
shared_state["resources"].append(custom_job)

assert custom_job.state == gca_job_state.JobState.JOB_STATE_SUCCEEDED

Expand All @@ -125,10 +121,8 @@ def test_from_local_script_enable_autolog_prebuilt_container(self, shared_state)
location=e2e_base._LOCATION,
staging_bucket=shared_state["staging_bucket_name"],
experiment=self._experiment_name,
experiment_tensorboard=self._backing_tensorboard,
)

shared_state["resources"].append(self._backing_tensorboard)
shared_state["resources"].append(
aiplatform.metadata.metadata._experiment_tracker.experiment
)
Expand All @@ -143,13 +137,16 @@ def test_from_local_script_enable_autolog_prebuilt_container(self, shared_state)
enable_autolog=True,
)

custom_job.run(
experiment=self._experiment_name,
experiment_run=self._experiment_run_name,
service_account=self._service_account,
)

shared_state["resources"].append(custom_job)
try:
with aiplatform.start_run(self._experiment_run_name) as run:
shared_state["resources"].append(run)
custom_job.run(
experiment=self._experiment_name,
experiment_run=run,
service_account=self._service_account,
)
finally:
shared_state["resources"].append(custom_job)

assert custom_job.state == gca_job_state.JobState.JOB_STATE_SUCCEEDED

Expand All @@ -159,8 +156,6 @@ def test_from_local_script_enable_autolog_custom_container(self, shared_state):
project=e2e_base._PROJECT,
location=e2e_base._LOCATION,
staging_bucket=shared_state["staging_bucket_name"],
experiment=self._experiment_name,
experiment_tensorboard=self._backing_tensorboard,
)

display_name = self._make_display_name("custom-job")
Expand All @@ -173,11 +168,18 @@ def test_from_local_script_enable_autolog_custom_container(self, shared_state):
enable_autolog=True,
)

custom_job.run(
experiment=self._experiment_name,
service_account=self._service_account,
)

shared_state["resources"].append(custom_job)
# Let the job auto-create the experiment run.
try:
custom_job.run(
experiment=self._experiment_name,
service_account=self._service_account,
)
finally:
shared_state["resources"].append(custom_job)
experiment_run_resource = aiplatform.Context.get(
custom_job.job_spec.experiment_run
)
if experiment_run_resource:
shared_state["resources"].append(experiment_run_resource)

assert custom_job.state == gca_job_state.JobState.JOB_STATE_SUCCEEDED

0 comments on commit 9936514

Please sign in to comment.