In [None]:
# Copyright 2025 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# Vertex AI Training/Pipelines PSC Interface Job Submission


<table align="left">
  <td style="text-align: center">
    <a href="https://colab.research.google.com/github/jbrache/vertex-ai-things/blob/main/codelabs/training-psc-interface-proxy/psc_interface_vertex_ai_job_submission.ipynb">
      <img src="https://www.gstatic.com/pantheon/images/bigquery/welcome_page/colab-logo.svg" alt="Google Colaboratory logo"><br> Open in Colab
    </a>
  </td>
  <td style="text-align: center">
    <a href="https://console.cloud.google.com/vertex-ai/colab/import/https:%2F%2Fraw.githubusercontent.com%2Fjbrache%2Fvertex-ai-things%2Fmain%2Fcodelabs%2Ftraining-psc-interface-proxy%2Fpsc_interface_vertex_ai_job_submission.ipynb">
      <img width="32px" src="https://lh3.googleusercontent.com/JmcxdQi-qOpctIvWKgPtrzZdJJK-J3sWE1RsfjZNwshCFgE_9fULcNpuXYTilIR2hjwN" alt="Google Cloud Colab Enterprise logo"><br> Open in Colab Enterprise
    </a>
  </td>    
  <td style="text-align: center">
    <a href="https://console.cloud.google.com/vertex-ai/workbench/deploy-notebook?download_url=https://raw.githubusercontent.com/jbrache/vertex-ai-things/main/codelabs/training-psc-interface-proxy/psc_interface_vertex_ai_job_submission.ipynb">
      <img src="https://www.gstatic.com/images/branding/gcpiconscolors/vertexai/v1/32px.svg" alt="Vertex AI logo"><br> Open in Workbench
    </a>
  </td>
  <td style="text-align: center">
    <a href="https://github.com/jbrache/vertex-ai-things/blob/main/codelabs/training-psc-interface-proxy/psc_interface_vertex_ai_job_submission.ipynb">
      <img width="32px" src="https://storage.googleapis.com/github-repo/generative-ai/logos/GitHub_Invertocat_Dark.svg" alt="GitHub logo"><br> View on GitHub
    </a>
  </td>
</table>

## Overview

This notebook demonstrates how to use the Private Service Connect (PSC) Interface resources created from Terraform to submit a Vertex AI Training job and Vertex AI Pipelines Job.

The jobs perform a wget from Vertex AI Training to the explicit proxy. This allows you to reach non-RFC 1918 VMs, such as the class-e-vm. An explicit proxy is not required for Vertex AI Pipelines to access rfc1918-vm, as its target is an RFC 1918 IP address.

Steps performed in the notebook:
* Configure project and resource information
* Submit Vertex AI Training Job
* Submit Vertex AI Pipelines Job
* Validate PSC Interface
* Validate Cloud Logging



## Get started

### Install Vertex AI SDK for Python and other required packages


In [None]:
! pip3 install --upgrade --quiet google-cloud-aiplatform \
                                  google-cloud-storage \
                                  kfp \
                                  google-cloud-pipeline-components

### Authenticate your notebook environment (Colab only)

Authenticate your environment on Google Colab.


In [None]:
import sys

if "google.colab" in sys.modules:

    from google.colab import auth

    auth.authenticate_user()

### Set Google Cloud project information

To get started using Vertex AI, you must have an existing Google Cloud project and [enable the Vertex AI API](https://console.cloud.google.com/flows/enableapi?apiid=aiplatform.googleapis.com). Learn more about [setting up a project and a development environment](https://cloud.google.com/vertex-ai/docs/start/cloud-environment).

In [None]:
import os

PROJECT_ID = "codelab-dev-jb0005"  # @param {type: "string", placeholder: "[your-project-id]", isTemplate: true}
if not PROJECT_ID or PROJECT_ID == "[your-project-id]":
    PROJECT_ID = str(os.environ.get("GOOGLE_CLOUD_PROJECT"))

LOCATION = "us-central1"  # @param {type:"string"}
REGION = str(LOCATION)

In [None]:
PROJECT_NUMBER = !(gcloud projects describe $PROJECT_ID --format="value(projectNumber)")
PROJECT_NUMBER = PROJECT_NUMBER[0]
PROJECT_NUMBER

In [None]:
# This bucket is used for Vertex AI Pipeliens
BUCKET_NAME = "codelab-dev-jb0005-aiplatform" # @param {type: "string"}
BUCKET_URI = f"gs://{BUCKET_NAME}"

IMAGE_REPO = 'pipelines-test-repo-psc'
IMAGE_NAME = 'nonrfc-ip-call'
TAG = 'latest'

IMAGE_URI= f'us-docker.pkg.dev/{PROJECT_ID}/{IMAGE_REPO}/{IMAGE_NAME}:{TAG}'

In [None]:
# PSC-I configs
NETWORK_ATTACHMENT_NAME = "us-central1-vertex-psci"
NETWORK_ATTACHMENT_ID = f"projects/{PROJECT_ID}/regions/{REGION}/networkAttachments/{NETWORK_ATTACHMENT_NAME}"

TARGET_PROJECT = str(PROJECT_ID)
TARGET_NETWORK = "consumer-vpc" #@param {type:"string"}
DNS_DOMAIN = 'demo.com.' #@param {type:"string"}

CLASS_E_IP = 'class-e-vm.demo.com' #@param {type:"string"}
NON_RFC_URL = f"http://{CLASS_E_IP}"

PROXY_VM_IP = "proxy-vm.demo.com" #@param {type:"string"}
PROXY_VM_PORT = "8888" #@param {type:"string"}

### Initialize Vertex AI SDK for Python

In [None]:
import vertexai
import json
from datetime import datetime

vertexai.init(project=PROJECT_ID, location=LOCATION)

## 1: Submit Vertex AI Training Job (Vertex AI SDK)

In [None]:
JOB_ID_PREFIX='test_psci-nonRFC' #@param {type:"string"}
JOB_ID = '{}_{}'.format(JOB_ID_PREFIX, datetime.now().strftime("%Y%m%d%H%M%S"))

In [None]:
from google.cloud import aiplatform

def create_custom_job_psci_sample(
    project: str,
    location: str,
    bucket: str,
    display_name: str,
    machine_type: str,
    replica_count: int,
    image_uri: str,
    network_attachment: str,
    domain: str,
    target_project: str,
    target_network: str,
):
    """Custom training job sample with PSC Interface Config."""
    aiplatform.init(project=project, location=location, staging_bucket=bucket)

    worker_pool_specs = [{
        "machine_spec": {
            "machine_type": machine_type,
        },
        "replica_count": replica_count,
        "container_spec": {
            "image_uri": image_uri,
            "command": [],
            "args": ["--sleep=600s"],
            "env": [
                {
                    "name": "NONRFC_URL",
                    "value": NON_RFC_URL
                },
                {
                    "name": "PROXY_VM_IP",
                    "value": PROXY_VM_IP
                },
                {
                    "name": "PROXY_VM_PORT",
                    "value": PROXY_VM_PORT
                }
            ]
        },
    }]
    psc_interface_config = {
        "network_attachment": network_attachment,
        "dns_peering_configs": [
            {
                "domain": domain,
                "target_project": target_project,
                "target_network": target_network,
            },
        ],
    }
    job = aiplatform.CustomJob(
        display_name=display_name,
        worker_pool_specs=worker_pool_specs,
    )

    job.run(
        psc_interface_config=psc_interface_config,
        sync=False,
    )

    return job

In [None]:
job = create_custom_job_psci_sample(
    project = PROJECT_ID,
    location = LOCATION,
    bucket = BUCKET_URI,
    display_name = JOB_ID,
    machine_type = "n2-standard-4",
    replica_count = 1,
    image_uri = IMAGE_URI,
    network_attachment=NETWORK_ATTACHMENT_ID,
    domain = DNS_DOMAIN,
    target_project = TARGET_PROJECT,
    target_network = TARGET_NETWORK
)

## 2: (Optional) Submit Vertex AI Training Job (REST API)

In [None]:
SERVICE_NAME = "aiplatform"
SERVICE =f"{SERVICE_NAME}.googleapis.com"
ENDPOINT=f"{REGION}-{SERVICE_NAME}.googleapis.com"
API_VERSION = "v1"

In [None]:
JOB_ID = f'{JOB_ID_PREFIX}_{datetime.now().strftime("%Y%m%d%H%M%S")}'

In [None]:
CUSTOM_JOB = {
  "display_name": JOB_ID,
  "job_spec": {
      "worker_pool_specs": [
          {
           "machine_spec": {
             "machine_type": "n2-standard-4",
           },
           "replica_count": 1,
           "container_spec": {
             "image_uri": IMAGE_URI,
             "env": [{
               "name": "NONRFC_URL",
               "value": NON_RFC_URL
             },
             {
               "name": "PROXY_VM_IP",
               "value": PROXY_VM_IP
             },
             {
               "name": "PROXY_VM_PORT",
               "value": PROXY_VM_PORT
             }]
           },
         },
      ],
      "enable_web_access": True,
      "psc_interface_config": {
        "network_attachment": NETWORK_ATTACHMENT_ID,
        "dns_peering_configs": [
          {
            "domain": DNS_DOMAIN,
            "target_project": PROJECT_ID,
            "target_network": TARGET_NETWORK
          },
        ]
      },
  }
}

print(json.dumps(CUSTOM_JOB, indent=2))

In [None]:
import requests
bearer_token = !gcloud auth application-default print-access-token
headers = {
    'Content-Type': 'application/json',
    'Authorization': 'Bearer {}'.format(bearer_token[0]),
}

request_uri = f"https://{REGION}-aiplatform.googleapis.com/{API_VERSION}/projects/{PROJECT_NUMBER}/locations/{REGION}/customJobs/"

print("request_uri: ", request_uri)

In [None]:
response_autopush = requests.post(request_uri, json=CUSTOM_JOB, headers=headers)
response = response_autopush
print("response:", response)
if response.reason == 'OK':
  job_name = response.json()['name']
  job_id = job_name.split('/')[-1]
  print("Created Job: ", response.json()['name'])
else:
  print(response.text)

## 3: Submit Vertex AI Pipelines Job (Vertex AI SDK)

In [None]:
# pipeline parameters
CACHE_PIPELINE = False # @param {type: "string"}
_DEFAULT_IMAGE = IMAGE_URI
PIPELINE_ROOT = f"{BUCKET_URI}/pipeline_root/intro"
PIPELINE_DISPLAY_NAME = "pipeline-nonRFCIP" # @param {type: "string"}
print(f"{PIPELINE_DISPLAY_NAME.lower()}-{datetime.now().strftime("%Y%m%d%H%M%S")}")

In [None]:
from re import S
import kfp
from kfp import dsl
from kfp.dsl import container_component, ContainerSpec
from kfp import compiler
from google.cloud import aiplatform

In [None]:
# ==== Component with env variable ====
@container_component
def dns_peering_test_op(dns_domain: str, proxy_vm_ip:str, proxy_vm_port:str):
    return ContainerSpec(
        image=_DEFAULT_IMAGE,
        command=["bash", "-c"],
        args=[
            """
            # These are installed in the container
            # apt-get update && apt-get install inetutils-traceroute inetutils-ping netcat-openbsd curl -y

            echo "Local IP(s): $(hostname -I)"

            echo "Attempting to trace route to %s"
            traceroute -w 1 -m 7 "%s"

            echo "Sending curl requests to http://%s via proxy %s:%s and recording trace..."
            if curl -L -v --trace-ascii /dev/stdout -x http://%s:%s "http://%s"; then
                echo "Curl request succeeded!"
            else
                echo "Curl request failed!"
                exit 1
            fi
            """ % (dns_domain, dns_domain, dns_domain, proxy_vm_ip, proxy_vm_port, proxy_vm_ip, proxy_vm_port, dns_domain)

        ]
    )

# ==== Pipeline ====
@dsl.pipeline(
    name="dns-peering-test-pipeline",
    description="Test DNS Peering using env variable",
    pipeline_root=PIPELINE_ROOT,
)
def dns_peering_test_pipeline(dns_domain: str, proxy_vm_ip:str, proxy_vm_port:str):
    dns_test_task = dns_peering_test_op(dns_domain=dns_domain, proxy_vm_ip=proxy_vm_ip, proxy_vm_port=proxy_vm_port)
    dns_test_task.set_caching_options(enable_caching=CACHE_PIPELINE)

# ==== Compile pipeline ====
if __name__ == "__main__":
    aiplatform.init(project=PROJECT_ID, location=LOCATION)

    compiler.Compiler().compile(
        pipeline_func=dns_peering_test_pipeline,
        package_path="dns_peering_test_pipeline.yaml",
    )
    print("✅ Pipeline compiled to dns_peering_test_pipeline.yaml")

In [None]:
import yaml
with open("dns_peering_test_pipeline.yaml", "r") as stream:
  try:
    pipeline_spec = yaml.safe_load(stream)
    print(pipeline_spec)
  except yaml.YAMLError as exc:
    print(exc)

In [None]:
# Import aiplatform and the appropriate API version v1
from google.cloud import aiplatform, aiplatform_v1

# Initialize the Vertex SDK using PROJECT_ID and LOCATION
aiplatform.init(project=PROJECT_ID, location=LOCATION)

# Create the API endpoint
client_options = {
"api_endpoint": f"{LOCATION}-aiplatform.googleapis.com"
}

# Initialize the PipelineServiceClient
client = aiplatform_v1.PipelineServiceClient(client_options=client_options)

PSCI_INTERFACE_CONFIG = {
    "network_attachment": NETWORK_ATTACHMENT_ID,
    "dns_peering_configs": [
      {
        "domain": DNS_DOMAIN,
        "target_project": TARGET_PROJECT,
        "target_network": TARGET_NETWORK
      }
    ]
}

# Construct the request
request = aiplatform_v1.CreatePipelineJobRequest(
    parent=f"projects/{PROJECT_ID}/locations/{LOCATION}",
    pipeline_job_id = f"{PIPELINE_DISPLAY_NAME.lower()}-{datetime.now().strftime("%Y%m%d%H%M%S")}",
    pipeline_job=aiplatform_v1.PipelineJob(
        display_name=PIPELINE_DISPLAY_NAME,
        pipeline_spec=pipeline_spec,
        runtime_config=aiplatform_v1.PipelineJob.RuntimeConfig(
            gcs_output_directory=BUCKET_URI,
            parameter_values = dict(
                dns_domain = NON_RFC_URL,
                proxy_vm_ip = PROXY_VM_IP,
                proxy_vm_port = PROXY_VM_PORT
            ),
        ),
        psc_interface_config=aiplatform_v1.PscInterfaceConfig(
            PSCI_INTERFACE_CONFIG
        ),
    )
)

In [None]:
# Make the API call
response = client.create_pipeline_job(request=request)

# Print the response
print(response)

## 4: (Optional) Submit Vertex AI Pipelines Job (REST API)

In [None]:
import yaml
with open("dns_peering_test_pipeline.yaml", "r") as stream:
  try:
    pipeline_spec = yaml.safe_load(stream)
    print(pipeline_spec)
  except yaml.YAMLError as exc:
    print(exc)

In [None]:
PIPELINE_JOB = {
  "display_name": PIPELINE_DISPLAY_NAME,
  "pipeline_spec": pipeline_spec,
  "runtime_config": {
       "gcs_output_directory": BUCKET_URI,
       "parameterValues": {
           "dns_domain": NON_RFC_URL,
           "proxy_vm_ip": PROXY_VM_IP,
           "proxy_vm_port": PROXY_VM_PORT
       }
   },
   "psc_interface_config": {
      "network_attachment": NETWORK_ATTACHMENT_ID,
      "dns_peering_configs": [
      {
        "domain": DNS_DOMAIN,
        "target_project": TARGET_PROJECT,
        "target_network": TARGET_NETWORK
      }
    ]
  }
}

print(json.dumps(PIPELINE_JOB, indent=2))

In [None]:
import requests
bearer_token = !gcloud auth application-default print-access-token
headers = {
    'Content-Type': 'application/json',
    'Authorization': 'Bearer {}'.format(bearer_token[0]),
}

request_uri = f"https://{LOCATION}-aiplatform.googleapis.com/v1/projects/{PROJECT_ID}/locations/{LOCATION}/pipelineJobs?pipelineJobId={PIPELINE_DISPLAY_NAME.lower()}-{datetime.now().strftime("%Y%m%d%H%M%S")}"

print("request_uri: ", request_uri)

In [None]:
response_autopush = requests.post(request_uri, json=PIPELINE_JOB, headers=headers)
response = response_autopush
print("response:", response)
if response.reason == 'OK':
  job_name = response.json()['name']
  job_id = job_name.split('/')[-1]
  print("Created Pipeline: ", response.json()['name'])
else:
  print(response.text)

## 5. Cloud Logging Validation
The Vertex AI Pipelines job will take approx 15 minutes to run the first time, subsequent runs are much shorter. To validate a successful outcome perform the following:

Navigate to Vertex AI → Training → Custom jobs - [Cloud Console Link](https://console.cloud.google.com/vertex-ai/training/custom-jobs)

Select the executed custom job

![Figure 5](resources/images/figure-5-custom-job.png)

Select View Logs

![Figure 6](resources/images/figure-6-view-job.png)

Once Cloud Logging is available, select Run Query that generates the highlighted selection below that confirms a successful wget from Vertex AI Pipelines to the class-e-vm.

![Figure 7](resources/images/figure-7-logs-explorer.png)

![Figure 8](resources/images/figure-8-job-logging.png)

## 6. Enable TCPDump
To validate IP connectivity from Vertex AI Pipelines, we can use TCPDUMP. This will allow us to observe communication originating from the PSC Network Attachment subnet, 192.168.10.0/28 when invoking the get request from Vertex AI Pipelines to the vm, class-e-vm.demo.com (240.0.0.0/4).

 From Cloud Shell ssh into the proxy vm.

```
gcloud compute ssh --zone us-central1-a "proxy-vm" --tunnel-through-iap --project $PROJECT_ID
```

 From the proxy-vm OS execute tcpdump filtering on the class-e-vm and PSC network attachment subnet.

```
sudo tcpdump -i any net 240.0.0.0/4 or 192.168.10.0/28 -nn
```

Open a new Cloud Shell tab, update your project variable and ssh into the class-e-vm

```
gcloud compute ssh --zone us-central1-a "class-e-vm" --tunnel-through-iap --project $PROJECT_ID
```

 From the class-e-vm OS execute tcpdump filtering on the proxy-vm subnet..

```
sudo tcpdump -i any net 10.10.10.0/28 -nn
```

### 6.1 TCPDump Validation
Let's review the TCPDUMP output that further validates the connectivity to compute instances:

From proxy-vm observe the HTTP GET and 200 OK

```console
03:05:34.778574 ens4  Out IP 10.10.10.2.40326 > 240.0.0.2.80: Flags [P.], seq 1:63, ack 1, win 511, options [nop,nop,TS val 1435446009 ecr 2475360885], length 62: HTTP: GET / HTTP/1.0
03:05:34.778946 ens4  In  IP 240.0.0.2.80 > 10.10.10.2.40326: Flags [.], ack 63, win 506, options [nop,nop,TS val 2475360889 ecr 1435446009], length 0
03:05:34.778974 ens4  Out IP 10.10.10.2.40326 > 240.0.0.2.80: Flags [P.], seq 63:185, ack 1, win 511, options [nop,nop,TS val 1435446010 ecr 2475360889], length 122: HTTP
03:05:34.781999 ens4  In  IP 240.0.0.2.80 > 10.10.10.2.40326: Flags [.], ack 185, win 506, options [nop,nop,TS val 2475360892 ecr 1435446010], length 0
03:05:34.906678 ens4  In  IP 240.0.0.2.80 > 10.10.10.2.40326: Flags [P.], seq 1:265, ack 185, win 506, options [nop,nop,TS val 2475361016 ecr 1435446010], length 264: HTTP: HTTP/1.1 200 OK
```

From class-e-vm observe the HTTP GET and 200 OK

```console
03:05:34.778768 ens4  In  IP 10.10.10.2.40326 > 240.0.0.2.80: Flags [P.], seq 1:63, ack 1, win 511, options [nop,nop,TS val 1435446009 ecr 2475360885], length 62: HTTP: GET / HTTP/1.0
03:05:34.778819 ens4  Out IP 240.0.0.2.80 > 10.10.10.2.40326: Flags [.], ack 63, win 506, options [nop,nop,TS val 2475360889 ecr 1435446009], length 0
03:05:34.781815 ens4  In  IP 10.10.10.2.40326 > 240.0.0.2.80: Flags [P.], seq 63:185, ack 1, win 511, options [nop,nop,TS val 1435446010 ecr 2475360889], length 122: HTTP
03:05:34.781856 ens4  Out IP 240.0.0.2.80 > 10.10.10.2.40326: Flags [.], ack 185, win 506, options [nop,nop,TS val 2475360892 ecr 1435446010], length 0
03:05:34.906503 ens4  Out IP 240.0.0.2.80 > 10.10.10.2.40326: Flags [P.], seq 1:265, ack 185, win 506, options [nop,nop,TS val 2475361016 ecr 1435446010], length 264: HTTP: HTTP/1.1 200 OK
```

## Cleaning up

Vertex AI Training and Vertex AI Pipelines were created in this notebook. To clean up resources, follow steps in the README.md in this directory.