Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix(PipelineJob): use name as output only field #719

Merged
merged 3 commits into from
Sep 22, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
25 changes: 3 additions & 22 deletions google/cloud/aiplatform/pipeline_jobs.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,9 +48,6 @@
[gca_pipeline_state_v1beta1.PipelineState.PIPELINE_STATE_FAILED]
)

# Vertex AI Pipelines service API job name relative name prefix pattern.
_JOB_NAME_PATTERN = "{parent}/pipelineJobs/{job_id}"

# Pattern for valid names used as a Vertex resource name.
_VALID_NAME_PATTERN = re.compile("^[a-z][-a-z0-9]{0,127}$")

Expand Down Expand Up @@ -178,19 +175,18 @@ def __init__(
)

pipeline_name = pipeline_job["pipelineSpec"]["pipelineInfo"]["name"]
job_id = job_id or "{pipeline_name}-{timestamp}".format(
self.job_id = job_id or "{pipeline_name}-{timestamp}".format(
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I know this is existing code, but why isn't there a way to get the job_id directly from the service instead of deriving it in the client SDK.

This seems like it could break if the service changes how job ID's are generated.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think job_id was provided by the user, or generated by the system and only used during run command but not the instance generation process. That's why we need to store it somewhere, since it wasn't used during the instance creation.

Name shouldn't be passed in since it was an output-only field and was wrongly passed in previously. Although the services generated the name using the same way.

pipeline_name=re.sub("[^-0-9a-z]+", "-", pipeline_name.lower())
.lstrip("-")
.rstrip("-"),
timestamp=_get_current_time().strftime("%Y%m%d%H%M%S"),
)
if not _VALID_NAME_PATTERN.match(job_id):
if not _VALID_NAME_PATTERN.match(self.job_id):
raise ValueError(
"Generated job ID: {} is illegal as a Vertex pipelines job ID. "
"Expecting an ID following the regex pattern "
'"[a-z][-a-z0-9]{{0,127}}"'.format(job_id)
)
job_name = _JOB_NAME_PATTERN.format(parent=self._parent, job_id=job_id)

builder = pipeline_utils.PipelineRuntimeConfigBuilder.from_job_spec_json(
pipeline_job
Expand All @@ -206,7 +202,6 @@ def __init__(

self._gca_resource = gca_pipeline_job_v1beta1.PipelineJob(
display_name=display_name,
name=job_name,
pipeline_spec=pipeline_job["pipelineSpec"],
labels=labels,
runtime_config=runtime_config,
Expand All @@ -215,18 +210,6 @@ def __init__(
),
)

def _assert_gca_resource_is_available(self) -> None:
# TODO(b/193800063) Change this to name after this fix
if not getattr(self._gca_resource, "create_time", None):
raise RuntimeError(
f"{self.__class__.__name__} resource has not been created."
+ (
f" Resource failed with: {self._exception}"
if self._exception
else ""
)
)

@base.optional_sync()
def run(
self,
Expand Down Expand Up @@ -257,12 +240,10 @@ def run(

_LOGGER.log_create_with_lro(self.__class__)

# PipelineJob.name is not used by pipeline service
pipeline_job_id = self._gca_resource.name.split("/")[-1]
self._gca_resource = self.api_client.create_pipeline_job(
parent=self._parent,
pipeline_job=self._gca_resource,
pipeline_job_id=pipeline_job_id,
pipeline_job_id=self.job_id,
)

_LOGGER.log_create_complete_with_getter(
Expand Down
1 change: 0 additions & 1 deletion tests/unit/aiplatform/test_pipeline_jobs.py
Original file line number Diff line number Diff line change
Expand Up @@ -236,7 +236,6 @@ def test_run_call_pipeline_service_create(
# Construct expected request
expected_gapic_pipeline_job = gca_pipeline_job_v1beta1.PipelineJob(
display_name=_TEST_PIPELINE_JOB_DISPLAY_NAME,
name=_TEST_PIPELINE_JOB_NAME,
pipeline_spec={
"components": {},
"pipelineInfo": _TEST_PIPELINE_JOB_SPEC["pipelineSpec"]["pipelineInfo"],
Expand Down