In [13]:
# Step 2 — Verify environment (Workbench)

import sys, subprocess, os

def pip_install(pkgs):
    subprocess.check_call([sys.executable, "-m", "pip", "install", "-U", *pkgs])

# Install if needed (safe to run)
pip_install([
    "google-cloud-aiplatform",
    "kfp",
    "google-cloud-pipeline-components",
    "numpy<2",
])

# Basic imports check
import google.cloud.aiplatform as aiplatform
import kfp
from kfp import dsl
from kfp.dsl import component

print("Python:", sys.version.split()[0])
print("aiplatform:", aiplatform.__version__)
print("kfp:", kfp.__version__)

Python: 3.10.19
aiplatform: 1.138.0
kfp: 2.15.2


In [14]:
# Step 3 — Initialize Vertex AI SDK

from kfp import dsl, compiler
from kfp.dsl import component, Output, Input, Model, Metrics
from google.cloud import aiplatform
import time

from google_cloud_pipeline_components.v1.custom_job import create_custom_training_job_from_component
from google_cloud_pipeline_components.v1.model import ModelUploadOp
from google_cloud_pipeline_components.v1.endpoint import ModelDeployOp
from google_cloud_pipeline_components.types import artifact_types

# Fill these in:
PROJECT_ID = "vertex-ai-487907"
LOCATION = "us-central1"
STAGING_BUCKET = "gs://vertex-mlops-vinzur"
PIPELINE_ROOT = f"{STAGING_BUCKET}/phase3-custom-container"
SERVICE_ACCOUNT = "vertex-pipeline-sa@vertex-ai-487907.iam.gserviceaccount.com"

TRAIN_IMAGE_URI = "us-central1-docker.pkg.dev/vertex-ai-487907/vertex-mlops/train-sklearn:3"
SERVE_IMAGE_URI = "us-central1-docker.pkg.dev/vertex-ai-487907/vertex-mlops/serve-sklearn:1"

ENDPOINT_RESOURCE_NAME = "projects/.../locations/us-central1/endpoints/..."

aiplatform.init(project=PROJECT_ID, location=LOCATION, staging_bucket=STAGING_BUCKET)


In [15]:
# Container-based training component (runs your training image)

from kfp import dsl
from kfp.dsl import Output, Model

@dsl.container_component
def train_container(n_rows: int, model_gcs_dir: str, model: Output[Model]):
    return dsl.ContainerSpec(
        image=TRAIN_IMAGE_URI,
        args=[
            "--model_dir", model.path,          # local path inside container
            "--model_gcs_dir", model_gcs_dir,   # gs://... chosen by us
            "--n_rows", n_rows,
        ],
    )
CustomTrainOp = create_custom_training_job_from_component(
    component_spec=train_container,
    display_name="phase3-custom-container-train",
    machine_type="n1-standard-4",
    replica_count=1,
    base_output_directory=PIPELINE_ROOT,
    service_account=SERVICE_ACCOUNT,
)

In [16]:
# Evaluation component (reads model artifact, logs accuracy, returns accuracy)

from typing import NamedTuple
from kfp.dsl import component, Input, Output, Model, Metrics

@component(
    base_image="python:3.10-slim",
    packages_to_install=["numpy<2", "pandas", "scikit-learn", "joblib"],
)

def evaluate(model: Input[Model], metrics: Output[Metrics]) -> NamedTuple("Outputs", [("accuracy", float)]):
    import joblib
    import numpy as np
    import pandas as pd
    from sklearn.metrics import accuracy_score

    clf = joblib.load(f"{model.path}/model.joblib")

    rng = np.random.default_rng(123)
    n = 300
    x1 = rng.normal(size=n)
    x2 = rng.normal(size=n)
    y = (x1 + 0.5 * x2 + rng.normal(scale=0.3, size=n) > 0).astype(int)

    df = pd.DataFrame({"x1": x1, "x2": x2, "y": y})
    X = df[["x1", "x2"]]
    y_true = df["y"]

    preds = clf.predict(X)
    acc = float(accuracy_score(y_true, preds))

    metrics.log_metric("accuracy", acc)
    print("accuracy:", acc)

    return (acc,)

In [17]:
# Pipeline (gate + upload model + deploy to existing endpoint)

from google_cloud_pipeline_components.v1.model import ModelUploadOp
from google_cloud_pipeline_components.v1.endpoint import ModelDeployOp
from google_cloud_pipeline_components.types import artifact_types
from kfp import dsl

@dsl.pipeline(name="phase3-custom-container-single-endpoint")
def phase3_pipeline(n_rows: int = 500, min_accuracy: float = 0.80):
    model_gcs_dir = f"{PIPELINE_ROOT}/models/{dsl.PIPELINE_JOB_ID_PLACEHOLDER}"

    train_task = CustomTrainOp(n_rows=n_rows, model_gcs_dir=model_gcs_dir)

    eval_task = evaluate(model=train_task.outputs["model"])

    with dsl.If(eval_task.outputs["accuracy"] >= min_accuracy, name="deploy_if_good"):
        # 1) Import model artifacts in GCS as an UnmanagedContainerModel
        unmanaged = dsl.importer(
            artifact_uri=model_gcs_dir,
            artifact_class=artifact_types.UnmanagedContainerModel,
            metadata={
                "containerSpec": {
                    "imageUri": SERVE_IMAGE_URI
                }
            },
        )

        # 2) Upload to Vertex Model Registry (this signature uses unmanaged_container_model)
        uploaded = ModelUploadOp(
            project=PROJECT_ID,
            location=LOCATION,
            display_name="phase3-sklearn-model",
            unmanaged_container_model=unmanaged.outputs["artifact"],
        )

        # 3) Import existing endpoint
        endpoint_art = dsl.importer(
            artifact_uri=ENDPOINT_RESOURCE_NAME,
            artifact_class=artifact_types.VertexEndpoint,
            metadata={"resourceName": ENDPOINT_RESOURCE_NAME},
        )

        # 4) Deploy to the fixed endpoint
        ModelDeployOp(
            model=uploaded.outputs["model"],
            endpoint=endpoint_art.outputs["artifact"],
            deployed_model_display_name="phase3-sklearn-deployed",
            dedicated_resources_machine_type="n1-standard-2",
            dedicated_resources_min_replica_count=1,
            dedicated_resources_max_replica_count=1,
            traffic_split={"0": "100"},
        )

In [None]:
# Compile and Run Pipeline

PIPELINE_YAML = "phase3_custom_container.yaml"
compiler.Compiler().compile(phase3_pipeline, PIPELINE_YAML)

job = aiplatform.PipelineJob(
    display_name=f"phase3-custom-container-{int(time.time())}",
    template_path=PIPELINE_YAML,
    pipeline_root=PIPELINE_ROOT,
    parameter_values={"n_rows": 500, "min_accuracy": 0.80},
    enable_caching=False,
)

job.run(service_account=SERVICE_ACCOUNT, sync=True)
print("State:", job.state)

Creating PipelineJob
PipelineJob created. Resource name: projects/208722280565/locations/us-central1/pipelineJobs/phase3-custom-container-single-endpoint-20260220190910
To use this PipelineJob in another session:
pipeline_job = aiplatform.PipelineJob.get('projects/208722280565/locations/us-central1/pipelineJobs/phase3-custom-container-single-endpoint-20260220190910')
View Pipeline Job:
https://console.cloud.google.com/vertex-ai/locations/us-central1/pipelines/runs/phase3-custom-container-single-endpoint-20260220190910?project=208722280565
PipelineJob projects/208722280565/locations/us-central1/pipelineJobs/phase3-custom-container-single-endpoint-20260220190910 current state:
PipelineState.PIPELINE_STATE_PENDING
PipelineJob projects/208722280565/locations/us-central1/pipelineJobs/phase3-custom-container-single-endpoint-20260220190910 current state:
PipelineState.PIPELINE_STATE_RUNNING
PipelineJob projects/208722280565/locations/us-central1/pipelineJobs/phase3-custom-container-single-end