In [None]:
# Copyright 2024 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# Performance Benchamrking Vertex Endpoint

This notebook demonstrates a method to simulate users accessing the Vertex AI private endpoint using a load testing tool called Locust. Locust measures metrics including requests per second, response times, and failure rates under user-specified load conditions. 

- `PROJECT_ID`: Google Cloud project ID where Vertex AI resources are deployed
- `LOCATION`: Google Cloud region where the Vertex AI endpoint is located
- `INTERNAL_IP_ADDRESS`: Internal IP address used to access the Vertex AI private service connect endpoint
- `VERTEX_AI_ENDPOINT_NAME`: Name of the Vertex AI private service connect endpoint
- `ENDPOINT_ID`: Identifier for the Vertex AI endpoint



In [None]:
# ! pip3 install --upgrade --user --quiet google-cloud-aiplatform

In [None]:
! pip3 install locust

In [39]:
import json
import google.auth.transport.requests
import google.auth
from google.cloud import storage
import requests

import logging
logging.disable(logging.WARNING)

import warnings
warnings.filterwarnings('ignore')

In [41]:
PROJECT_ID = "sandbox-401718"  # @param {type:"string"}
LOCATION = "us-central1"  # @param {type:"string"}
INTERNAL_IP_ADDRESS = ""  # @param {type:"string"}
VERTEX_AI_ENDPOINT_NAME = "psc-endpoint"  # @param {type:"string"}
ENDPOINT_ID = "8753138401445675008"  # @param {type:"string"}


### Test Endpoint

Test inferencing of the served model on the PSC private endpoint from `01-onnx_model_training_and_endpoint.ipynb` using REST requests

In [42]:
# Credentials

# Set up Application Default Credentials (ADC)

credentials, project_id = google.auth.default()
auth_req = google.auth.transport.requests.Request()
credentials.refresh(auth_req)
access_token = credentials.token
# url = f"https://us-central1-aiplatform.googleapis.com/v1/projects/{PROJECT_ID}/locations/us-central1/pipelineJobs"

url = f"https://{INTERNAL_IP_ADDRESS}/v1/projects/{PROJECT_ID}/locations/{LOCATION}/endpoints/{ENDPOINT_ID}:predict"


In [11]:
# # JSON FILE
# with open("example_payload.json") as json_file:
#     data = json.load(json_file)
#     url = f"https://{INTERNAL_IP_ADDRESS}/v1/projects/{PROJECT_ID}/locations/{LOCATION}/endpoints/{ENDPOINT_ID}:predict"
#     headers = {
#       "Content-Type": "application/json",
#       "Authorization": f"Bearer {access_token}"  # Add access token to headers
#     }
#     payload = {
#       "instances": data["instances"],
#     }

# response = requests.post(url, headers=headers, json=payload, verify=False)
# print(response.json())



{'deployedModelId': '8448572581040095232', 'model': 'projects/757654702990/locations/us-west2/models/2875552760122572800', 'modelDisplayName': 'vertex-custom-serve', 'modelVersionId': '1', 'predictions': [[-7.792285919189453, -2.458911180496216, -6.757010459899902, 7.791323661804199, -25.71830368041992, 13.0219030380249, -12.47751426696777, -9.414200782775879, -12.70892524719238, -3.479300022125244]]}


In [43]:
data = {"instances": [[[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0117647061124444, 0.07058823853731155, 0.07058823853731155, 0.07058823853731155, 0.4941176474094391, 0.5333333611488342, 0.686274528503418, 0.10196078568696976, 0.6509804129600525, 1.0, 0.9686274528503418, 0.49803921580314636, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.11764705926179886, 0.1411764770746231, 0.3686274588108063, 0.6039215922355652, 0.6666666865348816, 0.9921568632125854, 0.9921568632125854, 0.9921568632125854, 0.9921568632125854, 0.9921568632125854, 0.8823529481887817, 0.6745098233222961, 0.9921568632125854, 0.9490196108818054, 0.7647058963775635, 0.250980406999588, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.1921568661928177, 0.9333333373069763, 0.9921568632125854, 0.9921568632125854, 0.9921568632125854, 0.9921568632125854, 0.9921568632125854, 0.9921568632125854, 0.9921568632125854, 0.9921568632125854, 0.9843137264251709, 0.364705890417099, 0.32156863808631897, 0.32156863808631897, 0.21960784494876862, 0.15294118225574493, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.07058823853731155, 0.8588235378265381, 0.9921568632125854, 0.9921568632125854, 0.9921568632125854, 0.9921568632125854, 0.9921568632125854, 0.7764706015586853, 0.7137255072593689, 0.9686274528503418, 0.9450980424880981, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.3137255012989044, 0.6117647290229797, 0.41960784792900085, 0.9921568632125854, 0.9921568632125854, 0.8039215803146362, 0.04313725605607033, 0.0, 0.16862745583057404, 0.6039215922355652, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.054901961237192154, 0.003921568859368563, 0.6039215922355652, 0.9921568632125854, 0.3529411852359772, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.545098066329956, 0.9921568632125854, 0.7450980544090271, 0.007843137718737125, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.04313725605607033, 0.7450980544090271, 0.9921568632125854, 0.27450981736183167, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.13725490868091583, 0.9450980424880981, 0.8823529481887817, 0.6274510025978088, 0.42352941632270813, 0.003921568859368563, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.3176470696926117, 0.9411764740943909, 0.9921568632125854, 0.9921568632125854, 0.46666666865348816, 0.09803921729326248, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.1764705926179886, 0.729411780834198, 0.9921568632125854, 0.9921568632125854, 0.5882353186607361, 0.10588235408067703, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.062745101749897, 0.364705890417099, 0.9882352948188782, 0.9921568632125854, 0.7333333492279053, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.9764705896377563, 0.9921568632125854, 0.9764705896377563, 0.250980406999588, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.18039216101169586, 0.5098039507865906, 0.7176470756530762, 0.9921568632125854, 0.9921568632125854, 0.8117647171020508, 0.007843137718737125, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.15294118225574493, 0.5803921818733215, 0.8980392217636108, 0.9921568632125854, 0.9921568632125854, 0.9921568632125854, 0.9803921580314636, 0.7137255072593689, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0941176488995552, 0.4470588266849518, 0.8666666746139526, 0.9921568632125854, 0.9921568632125854, 0.9921568632125854, 0.9921568632125854, 0.7882353067398071, 0.30588236451148987, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.09019608050584793, 0.25882354378700256, 0.8352941274642944, 0.9921568632125854, 0.9921568632125854, 0.9921568632125854, 0.9921568632125854, 0.7764706015586853, 0.3176470696926117, 0.007843137718737125, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.07058823853731155, 0.6705882549285889, 0.8588235378265381, 0.9921568632125854, 0.9921568632125854, 0.9921568632125854, 0.9921568632125854, 0.7647058963775635, 0.3137255012989044, 0.03529411926865578, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0, 0.21568627655506134, 0.6745098233222961, 0.886274516582489, 0.9921568632125854, 0.9921568632125854, 0.9921568632125854, 0.9921568632125854, 0.95686274766922, 0.5215686559677124, 0.04313725605607033, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0, 0.5333333611488342, 0.9921568632125854, 0.9921568632125854, 0.9921568632125854, 0.8313725590705872, 0.529411792755127, 0.5176470875740051, 0.062745101749897, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]]]]}
url = f"https://{INTERNAL_IP_ADDRESS}/v1/projects/{PROJECT_ID}/locations/{LOCATION}/endpoints/{ENDPOINT_ID}:predict"
headers = {
    "Content-Type": "application/json",
    "Authorization": f"Bearer {access_token}",  # Add access token to headers
}
payload = {
    "instances": data["instances"],
}

response = requests.post(url, headers=headers, json=payload, verify=False)

print(response.json())


{'deployedModelId': '2920721797303238656', 'model': 'projects/757654702990/locations/us-central1/models/4952161888596131840', 'modelDisplayName': 'vertex-custom-serve', 'modelVersionId': '1', 'predictions': [[-8.667357444763184, -6.109257221221924, -4.605557441711426, 5.294328689575195, -25.1528377532959, 12.09958076477051, -17.54855728149414, -6.331753253936768, -12.77934455871582, -3.730733156204224]]}


### Run Locust Session & Stress Test

Run a Locust session, execute stress tests, and generate visual charts & statistics to assess the health and scalability of the endpoint.

Examples outputs and commands below:

![stress-stats.png](./imgs/stress-test-stats.png)
<br><br>
![stress-test.png](./imgs/stress-test.png)


In [52]:
payload_file = "example_payload.json"  # @param {type:"string"}

locustfile_content = f"""
from locust import HttpUser, task, between
import json
import subprocess

import json
import google.auth.transport.requests
import google.auth
from google.cloud import storage
import requests

import warnings
warnings.filterwarnings('ignore')

class VertexAIPredict(HttpUser):
    wait_time = between(1, 3)

    host = "https://{INTERNAL_IP_ADDRESS}"

    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)

        self.project_id = "{PROJECT_ID}"
        self.location = "{LOCATION}"
        self.endpoint_id = "{ENDPOINT_ID}"
        self.access_token = self.get_token()

    def get_token(self):
        try:
            token = subprocess.check_output(['gcloud', 'auth', 'print-access-token']).decode('utf-8').strip()
            return token
        except subprocess.CalledProcessError:
            print("Error getting access token. Make sure gcloud is configured.")
            return None


    @task
    def predict_endpoint(self):

        credentials, project_id = google.auth.default()
        auth_req = google.auth.transport.requests.Request()
        credentials.refresh(auth_req)
        access_token = credentials.token

        with open("{payload_file}", "r") as json_file:
            data = json.load(json_file)
            path = f"/v1/projects/{{self.project_id}}/locations/{{self.location}}/endpoints/{{self.endpoint_id}}:predict"

            headers = {{
                "Content-Type": "application/json",
                "Authorization": f"Bearer {{access_token}}"
            }}
            payload = {{
                "instances": data["instances"],
            }}


        with self.client.post(
            path,
            headers=headers,
            json=payload,
            verify=False,
            catch_response=True,
            name="/predict"
        ) as response:
            if response.status_code != 200:
                response.failure(f"Failed: {{response.status_code}}. Response Content: {{response.text}}") # Include response content
"""


with open("locust_test.py", "w") as f:
    f.write(locustfile_content)


In [None]:
! locust -f locust_test.py

In [45]:
! locust -f locust_test.py --headless -u 1000 -r 100 --run-time 10m --only-summary --loglevel CRITICAL

Type     Name  # reqs      # fails |    Avg     Min     Max    Med |   req/s  failures/s
--------||-------|-------------|-------|-------|-------|-------|--------|-----------
POST     /predict   45456     0(0.00%) |     91       5    7165    100 |   75.77        0.00
--------||-------|-------------|-------|-------|-------|-------|--------|-----------
         Aggregated   45456     0(0.00%) |     91       5    7165    100 |   75.77        0.00

Response time percentiles (approximated)
Type     Name      50%    66%    75%    80%    90%    95%    98%    99%  99.9% 99.99%   100% # reqs
--------||--------|------|------|------|------|------|------|------|------|------|------|------
POST     /predict      100    110    120    130    150    170    190    220    370   3400   7200  45456
--------||--------|------|------|------|------|------|------|------|------|------|------|------
         Aggregated      100    110    120    130    150    170    190    220    370   3400   7200  45456

