In [1]:
# Step 2 — Verify environment (Workbench)

import sys, subprocess, os

def pip_install(pkgs):
    subprocess.check_call([sys.executable, "-m", "pip", "install", "-U", *pkgs])

# Install if needed (safe to run)
pip_install([
    "google-cloud-aiplatform",
    "kfp",
    "google-cloud-pipeline-components",
    "numpy<2",
])

# Basic imports check
import google.cloud.aiplatform as aiplatform
import kfp
from kfp import dsl
from kfp.dsl import component

print("Python:", sys.version.split()[0])
print("aiplatform:", aiplatform.__version__)
print("kfp:", kfp.__version__)


Collecting google-cloud-aiplatform
  Downloading google_cloud_aiplatform-1.138.0-py2.py3-none-any.whl.metadata (46 kB)
Collecting google-cloud-pipeline-components
  Downloading google_cloud_pipeline_components-2.22.0-py3-none-any.whl.metadata (5.7 kB)
Collecting numpy<2
  Downloading numpy-1.26.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (61 kB)
Downloading numpy-1.26.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (18.2 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m18.2/18.2 MB[0m [31m89.0 MB/s[0m  [33m0:00:00[0m6m0:00:01[0m
[?25hDownloading google_cloud_aiplatform-1.138.0-py2.py3-none-any.whl (8.2 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m8.2/8.2 MB[0m [31m88.9 MB/s[0m  [33m0:00:00[0m
[?25hDownloading google_cloud_pipeline_components-2.22.0-py3-none-any.whl (1.4 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.4/1.4 MB[0m [31m54.4 MB/s[0m  [33m0:00:00[0m
[?25hInsta



Python: 3.10.19
aiplatform: 1.138.0
kfp: 2.15.2


In [2]:
# Step 3 — Initialize Vertex AI SDK

# Fill these in:
PROJECT_ID = "vertex-ai-487907"         
LOCATION = "us-central1"
STAGING_BUCKET = "gs://vertex-mlops-vinzur"
PIPELINE_ROOT = f"{STAGING_BUCKET}/pipeline-root"
SERVICE_ACCOUNT = "vertex-pipeline-sa@vertex-ai-487907.iam.gserviceaccount.com"

aiplatform.init(
    project=PROJECT_ID,
    location=LOCATION,
    staging_bucket=STAGING_BUCKET,
)

print("Initialized Vertex AI with:")
print("PROJECT_ID:", PROJECT_ID)
print("LOCATION:", LOCATION)
print("STAGING_BUCKET:", STAGING_BUCKET)
print("PIPELINE_ROOT:", PIPELINE_ROOT)
print("SERVICE_ACCOUNT:", SERVICE_ACCOUNT)

Initialized Vertex AI with:
PROJECT_ID: vertex-ai-487907
LOCATION: us-central1
STAGING_BUCKET: gs://vertex-mlops-vinzur
PIPELINE_ROOT: gs://vertex-mlops-vinzur/pipeline-root
SERVICE_ACCOUNT: vertex-pipeline-sa@vertex-ai-487907.iam.gserviceaccount.com


In [3]:
# Step 4 — Build the 3-step pipeline (generate -> multiply -> print)

from kfp.dsl import OutputPath, InputPath
from kfp import compiler

@component(
    base_image="python:3.10-slim",
)
def generate_number(start: int) -> int:
    # simple deterministic "generation"
    n = start + 7
    print(f"[generate_number] start={start} -> n={n}")
    return n

@component(
    base_image="python:3.10-slim",
)
def multiply_number(x: int, factor: int) -> int:
    y = x * factor
    print(f"[multiply_number] x={x}, factor={factor} -> y={y}")
    return y

@component(
    base_image="python:3.10-slim",
)
def print_result(value: int) -> str:
    msg = f"Final result is {value}"
    print(f"[print_result] {msg}")
    return msg

@dsl.pipeline(
    name="phase1-simple-3step",
    description="Phase 1: minimal 3-step pipeline to learn define/compile/run",
)
def phase1_pipeline(start: int = 5, factor: int = 3):
    n = generate_number(start=start)
    y = multiply_number(x=n.output, factor=factor)
    _ = print_result(value=y.output)

# Compile to YAML (this is what Vertex executes)
PIPELINE_YAML = "phase1_pipeline.yaml"
compiler.Compiler().compile(
    pipeline_func=phase1_pipeline,
    package_path=PIPELINE_YAML
)

print("Compiled pipeline to:", PIPELINE_YAML)

Compiled pipeline to: phase1_pipeline.yaml


In [4]:
# Step 5 — (Already compiled) but keep this cell to confirm paths you’ll use

import os

PIPELINE_YAML = "phase1_pipeline.yaml"  # should exist from previous step
assert os.path.exists(PIPELINE_YAML), f"Missing {PIPELINE_YAML}. Re-run Step 4 compile."

print("Pipeline YAML found:", os.path.abspath(PIPELINE_YAML))
print("Pipeline root:", PIPELINE_ROOT)

Pipeline YAML found: /home/jupyter/phase1_pipeline.yaml
Pipeline root: gs://vertex-mlops-vinzur/pipeline-root


In [8]:
# Step 6 — Submit the pipeline to Vertex AI (run it)

from google.cloud import aiplatform
import time

JOB_DISPLAY_NAME = f"phase1-simple-3step-{int(time.time())}"

job = aiplatform.PipelineJob(
    display_name=JOB_DISPLAY_NAME,
    template_path=PIPELINE_YAML,      # compiled YAML
    pipeline_root=PIPELINE_ROOT,      # gs://.../pipeline-root
    parameter_values={                # must match pipeline args
        "start": 5,
        "factor": 3,
    },
    enable_caching=False,             # keep False while learning so you always see it run
)

job.run(
    service_account=SERVICE_ACCOUNT,  # vertex-pipeline-sa
    sync=True,                        # wait until the run finishes (set False if you want it async)
)

print("Pipeline state:", job.state)
print("Pipeline resource name:", job.resource_name)

Creating PipelineJob
PipelineJob created. Resource name: projects/208722280565/locations/us-central1/pipelineJobs/phase1-simple-3step-20260219183821
To use this PipelineJob in another session:
pipeline_job = aiplatform.PipelineJob.get('projects/208722280565/locations/us-central1/pipelineJobs/phase1-simple-3step-20260219183821')
View Pipeline Job:
https://console.cloud.google.com/vertex-ai/locations/us-central1/pipelines/runs/phase1-simple-3step-20260219183821?project=208722280565
PipelineJob projects/208722280565/locations/us-central1/pipelineJobs/phase1-simple-3step-20260219183821 current state:
PipelineState.PIPELINE_STATE_PENDING
PipelineJob projects/208722280565/locations/us-central1/pipelineJobs/phase1-simple-3step-20260219183821 current state:
PipelineState.PIPELINE_STATE_PENDING
PipelineJob projects/208722280565/locations/us-central1/pipelineJobs/phase1-simple-3step-20260219183821 current state:
PipelineState.PIPELINE_STATE_PENDING
PipelineJob projects/208722280565/locations/us-

In [9]:
# Optional: Re-run with different parameters (so you can compare runs in the UI)

import time
JOB_DISPLAY_NAME = f"phase1-simple-3step-rerun-{int(time.time())}"

job2 = aiplatform.PipelineJob(
    display_name=JOB_DISPLAY_NAME,
    template_path=PIPELINE_YAML,
    pipeline_root=PIPELINE_ROOT,
    parameter_values={
        "start": 10,
        "factor": 4,
    },
    enable_caching=False,
)

job2.run(service_account=SERVICE_ACCOUNT, sync=True)

print("Pipeline state:", job2.state)
print("Pipeline resource name:", job2.resource_name)

Creating PipelineJob
PipelineJob created. Resource name: projects/208722280565/locations/us-central1/pipelineJobs/phase1-simple-3step-20260219184741
To use this PipelineJob in another session:
pipeline_job = aiplatform.PipelineJob.get('projects/208722280565/locations/us-central1/pipelineJobs/phase1-simple-3step-20260219184741')
View Pipeline Job:
https://console.cloud.google.com/vertex-ai/locations/us-central1/pipelines/runs/phase1-simple-3step-20260219184741?project=208722280565
PipelineJob projects/208722280565/locations/us-central1/pipelineJobs/phase1-simple-3step-20260219184741 current state:
PipelineState.PIPELINE_STATE_PENDING
PipelineJob projects/208722280565/locations/us-central1/pipelineJobs/phase1-simple-3step-20260219184741 current state:
PipelineState.PIPELINE_STATE_RUNNING
PipelineJob projects/208722280565/locations/us-central1/pipelineJobs/phase1-simple-3step-20260219184741 current state:
PipelineState.PIPELINE_STATE_RUNNING
PipelineJob projects/208722280565/locations/us-