# BQML Model to Vertex AI Endpoint Kubeflow Pipeline using Google Components

In [None]:
!pip install --upgrade kfp google-cloud-aiplatform google-cloud-bigquery google-cloud-storage shapely google-cloud-pipeline-components

# Required Imports and Variables

In [None]:
import kfp
from kfp import dsl
from google_cloud_pipeline_components.v1.bigquery import BigqueryCreateModelJobOp, BigqueryExportModelJobOp
from google_cloud_pipeline_components.v1.endpoint import EndpointCreateOp, ModelDeployOp
from google_cloud_pipeline_components.v1.model import ModelGetOp

PROJECT_ID = ! gcloud config get-value project
PROJECT_ID = PROJECT_ID[0]

# define project information manually if the above code didn't work
if PROJECT_ID == "(unset)":
  PROJECT_ID = "[your-project-id]" # @param {type:"string"}

print(PROJECT_ID)

# Make sure the BigQuery Dataset and Table exists for the
# MPG example.
BQ_DATASET = "mpg_dataset" # @param {type:"string"}
BQ_TABLE = "mpg" # @param {type:"string"}
BQ_MODEL = "mpg_model" # @param {type:"string"}
BQ_LOCATION = "US"  # @param {type:"string"}

# Vertex AI Constants
REGION = "us-central1"  # @param {type:"string"}
MODEL_DISPLAY_NAME = "bqml_mpg_model" # @param {type:"string"}
ENDPOINT_DISPLAY_NAME = "bqml_mpg_endpoint" # @param {type:"string"}

# Ensure this bucket exists
BUCKET_NAME = f"{PROJECT_ID}-mpg-model"

PIPELINE_ROOT = f"gs://{BUCKET_NAME}/pipeline-root/"

## Custom KFP Component to upload the model to Vertex AI Model Endpoints

Note: I wrote this because I could get the Google Component to work. Oh well.

In [None]:
@dsl.component(base_image="python:3.9", packages_to_install=["google-cloud-aiplatform"])
def upload_model_custom(
    project_id: str,
    model_display_name: str,
    gcs_model_path: str,
)-> str:
    """Uploads a model to Vertex AI Model Registry using the Python SDK."""
    from google.cloud import aiplatform

    # Initialize the Vertex AI SDK
    aiplatform.init(project=project_id, location="us-central1")

    # Upload the model
    model = aiplatform.Model.upload(
        display_name=model_display_name,
        artifact_uri=gcs_model_path,
        serving_container_image_uri="us-docker.pkg.dev/vertex-ai/prediction/tf2-cpu.2-8:latest"
    )

    full_resource_name = model.resource_name
    print(f"Full resource name: {full_resource_name}")

    # Extract just the numeric model ID
    # This is required for the ModelGetOp task
    numeric_id = full_resource_name.split("/")[-1]
    print(f"Numeric model ID: {numeric_id}")

    return numeric_id

## Define the Pipeline

 This pipeliine uses Google-provided Kubeflow components

In [None]:
@dsl.pipeline(
    name="bigquery-vertex-ai-pipeline",
    pipeline_root=PIPELINE_ROOT,
)
def bigquery_vertex_pipeline(project_id: str):
    # **1️⃣ Train BigQueryML Model**
    train_model_task = BigqueryCreateModelJobOp(
        project=PROJECT_ID,
        location=BQ_LOCATION,
        query=f"""
        CREATE OR REPLACE MODEL `{PROJECT_ID}.{BQ_DATASET}.{BQ_MODEL}`
        OPTIONS(model_type='LINEAR_REG', input_label_cols=['MPG'])
        AS SELECT Cylinders, Displacement, Horsepower, Weight, Acceleration, Model_Year, Origin, MPG
        FROM `{PROJECT_ID}.{BQ_DATASET}.{BQ_TABLE}`
        WHERE MPG IS NOT NULL;
        """
    )

    # **2️⃣ Export Model to Cloud Storage**
    export_model_task = BigqueryExportModelJobOp(
        project=PROJECT_ID,
        location=BQ_LOCATION,
        model=train_model_task.outputs["model"],
        model_destination_path=f"gs://{BUCKET_NAME}/models/{BQ_MODEL}/"
    ).after(train_model_task)

    # **3️⃣ Upload Model to Vertex AI Model Registry**
    upload_model_task = upload_model_custom(
        project_id=PROJECT_ID,
        model_display_name=MODEL_DISPLAY_NAME,
        gcs_model_path=export_model_task.outputs["exported_model_path"]
    ).after(export_model_task)

    model_get_task = ModelGetOp(
        project=PROJECT_ID,
        location=REGION,
        model_name=upload_model_task.outputs["Output"]
    ).after(upload_model_task)


    # **4️⃣ Create a Vertex AI Endpoint**
    create_endpoint_task = EndpointCreateOp(
        project=PROJECT_ID,
        display_name=ENDPOINT_DISPLAY_NAME
    )

    # **5️⃣ Deploy Model to the Endpoint**
    deploy_model_task = ModelDeployOp(
        model=model_get_task.outputs["model"],
        endpoint=create_endpoint_task.outputs["endpoint"],
        dedicated_resources_machine_type="n1-standard-2",
        dedicated_resources_min_replica_count=1,
        dedicated_resources_max_replica_count=1,
    ).after(model_get_task, create_endpoint_task)


## Need to compile the pipeline into a JSON File

In [None]:
# **Compile the pipeline**
kfp.compiler.Compiler().compile(
    pipeline_func=bigquery_vertex_pipeline,
    package_path="bqml-mpg-vertex-pipeline.json"
)

## Submit the Pipeline job to Vertex AI Pipelines

In [None]:
import google.cloud.aiplatform as aip

# Before initializing, make sure to set the GOOGLE_APPLICATION_CREDENTIALS
# environment variable to the path of your service account.
aip.init(project=PROJECT_ID, location=REGION)

# Prepare the pipeline job
pipeline_job = aip.PipelineJob(
    display_name="bqml-mpg-vertex-pipeline",
    template_path="bqml-mpg-vertex-pipeline.json",
    pipeline_root=PIPELINE_ROOT,
    parameter_values={
        'project_id': PROJECT_ID
    },
    enable_caching=False
)

pipeline_job.submit()

INFO:google.cloud.aiplatform.pipeline_jobs:Creating PipelineJob
INFO:google.cloud.aiplatform.pipeline_jobs:PipelineJob created. Resource name: projects/117114503109/locations/us-central1/pipelineJobs/bigquery-vertex-ai-pipeline-20250207142204
INFO:google.cloud.aiplatform.pipeline_jobs:To use this PipelineJob in another session:
INFO:google.cloud.aiplatform.pipeline_jobs:pipeline_job = aiplatform.PipelineJob.get('projects/117114503109/locations/us-central1/pipelineJobs/bigquery-vertex-ai-pipeline-20250207142204')
INFO:google.cloud.aiplatform.pipeline_jobs:View Pipeline Job:
https://console.cloud.google.com/vertex-ai/locations/us-central1/pipelines/runs/bigquery-vertex-ai-pipeline-20250207142204?project=117114503109


## Let's test the Endpoint

In [None]:
# Replace with the actual deployed endpoint ID
ENDPOINT_ID = "6730270102802923520"  # @param {type:"string"}

# Replace with your Project Number (not ID)
PROJECT_NUMBER = "117114503109"  # @param {type:"string"}

## Predict function taken from the Vertex AI Docs

see: https://github.com/googleapis/python-aiplatform/blob/main/samples/snippets/prediction_service/predict_custom_trained_model_sample.py



In [None]:
from typing import Dict, List, Union
from google.cloud import aiplatform
from google.protobuf import json_format
from google.protobuf.struct_pb2 import Value


def predict_custom_trained_model_sample(
    project: str,
    endpoint_id: str,
    instances: Union[Dict, List[Dict]],
    location: str = "us-central1",
    api_endpoint: str = "us-central1-aiplatform.googleapis.com",
):
    """
    `instances` can be either single instance of type dict or a list
    of instances.
    """
    # The AI Platform services require regional API endpoints.
    client_options = {"api_endpoint": api_endpoint}
    # Initialize client that will be used to create and send requests.
    # This client only needs to be created once, and can be reused for multiple requests.
    client = aiplatform.gapic.PredictionServiceClient(client_options=client_options)
    # The format of each instance should conform to the deployed model's prediction input schema.
    instances = instances if isinstance(instances, list) else [instances]
    instances = [
        json_format.ParseDict(instance_dict, Value()) for instance_dict in instances
    ]
    parameters_dict = {}
    parameters = json_format.ParseDict(parameters_dict, Value())
    endpoint = client.endpoint_path(
        project=project, location=location, endpoint=endpoint_id
    )
    response = client.predict(
        endpoint=endpoint, instances=instances, parameters=parameters
    )
    print("response")
    print(" deployed_model_id:", response.deployed_model_id)
    # The predictions are a google.protobuf.Value representation of the model's predictions.
    predictions = response.predictions
    for prediction in predictions:
        print(" prediction:", prediction)

## Run a Prediction using the Deployed Endpoint

In [None]:
input_data = {
    "instances": [
        {
            "Cylinders": 4,
            "Displacement": 140.0,
            "Horsepower": 90.0,
            "Weight": 2264.0,
            "Acceleration": 15.5,
            "Model_Year": 82,
            "Origin": 1,
        },
        {
            "Cylinders": 6,
            "Displacement": 200.0,
            "Horsepower": 110.0,
            "Weight": 2600.0,
            "Acceleration": 18.5,
            "Model_Year": 78,
            "Origin": 0,
        }
    ]
}


predict_custom_trained_model_sample(
    project=PROJECT_NUMBER,
    endpoint_id=ENDPOINT_ID,
    location=REGION,
    instances=input_data["instances"]
)

response
 deployed_model_id: 1562974470581256192
 prediction: [31.73883785083203]
 prediction: [25.41370428058034]
