In [None]:
# Copyright 2024 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# Set-up Rolling Deployment to replace a Deployed Model

This notebook demonstrates how to perform a rolling deployment on Vertex AI to replace an existing deployed model with zero downtime. Rolling deployments gradually transition traffic from the old model to the new model while maintaining service availability.

See: [Rolling Deployment documentation](https://cloud.google.com/vertex-ai/docs/predictions/rolling-deployment)

In [1]:
import json
import google.auth.transport.requests
import google.auth
from google.cloud import storage
import requests
from google.cloud import aiplatform

#### Configure Deployment Parameters

Set the project configuration and deployment parameters:
- `ENDPOINT_ID`: The endpoint where the model is deployed (from notebook 01)
- `MODEL_ID`: ID of the NEW model from the Vertex AI Model Registry that you want to deploy.
- `PREVIOUS_DEPLOYED_MODEL`: DeployedModel ID of a model on the endpoint to replace
- `MAX_UNAVAILABLE_REPLICAS`: Number of replicas that can be down during rollout (controls deployment risk)
- `MAX_SURGE_REPLICAS`: Number of extra replicas to create temporarily (controls deployment speed)

In [None]:
PROJECT_ID = "sandbox-401718"  # @param {type:"string"}
LOCATION_ID = "us-central1"  # @param {type:"string"}
endpoint_name = f"projects/757654702990/locations/us-central1/endpoints/2725107683605610496" # @param {type:"string"}

# Rolling Deployment paramaters 
ENDPOINT_ID = aiplatform.Endpoint(endpoint_name=endpoint_name).name
PREVIOUS_DEPLOYED_MODEL = "8570832776490647552" # @param {type:"string"} (see CLI command below to list)
MODEL_ID = "323587371566104576" # @param {type:"string"} 
MAX_UNAVAILABLE_REPLICAS = 1 # @param {type:"integer"}
MAX_SURGE_REPLICAS = 1 # @param {type:"integer"}

In [40]:
# # List the DeployedModel ID of a model on the same endpoint
# ! gcloud ai endpoints describe {ENDPOINT_ID}  \
#   --project={PROJECT_ID} \
#   --region={LOCATION_ID} \
#   --format="value(deployedModels.id)"

Using endpoint [https://us-central1-aiplatform.googleapis.com/]
4095662121767927808;8570832776490647552


In [3]:
# Credentials

# Set up Application Default Credentials (ADC)
credentials, project_id = google.auth.default()
auth_req = google.auth.transport.requests.Request()
credentials.refresh(auth_req)
access_token = credentials.token

In [6]:
# url = f"https://discoveryengine.googleapis.com/v1alpha/projects/{PROJECT_ID}/locations/global/podcasts"
url = f"https://{LOCATION_ID}-aiplatform.googleapis.com/v1beta1/projects/{PROJECT_ID}/locations/{LOCATION_ID}/endpoints/{ENDPOINT_ID}:deployModel"

headers = {
    'Authorization': 'Bearer ' + access_token,
    'Content-Type': 'application/json; charset=utf-8'
}

request_body = str({
  "deployedModel": {
    "model": f"projects/{PROJECT_ID}/locations/{LOCATION_ID}/models/{MODEL_ID}",
    "rolloutOptions": {
      "previousDeployedModel": f"{PREVIOUS_DEPLOYED_MODEL}",
      "maxUnavailableReplicas": f"{MAX_UNAVAILABLE_REPLICAS}",
      "maxSurgeReplicas": f"{MAX_SURGE_REPLICAS}"
    }
  }
})

#### Start Deployment vis REST
Initiates the rolling deployment via REST API and polls operation status until completion

In [7]:
import time

# Start timing
start_time = time.time()

# Make the initial deployModel request
response = requests.post(url, headers=headers, data=request_body)
response.raise_for_status()

operation = response.json()
operation_name = operation['name']
print(f"Started deployment operation: {operation_name}")

# Poll the operation until completion
while True:
    op_url = f"https://{LOCATION_ID}-aiplatform.googleapis.com/v1/{operation_name}"
    op_response = requests.get(op_url, headers=headers)
    op_response.raise_for_status()
    op_data = op_response.json()
    
    if op_data.get('done', False):
        end_time = time.time()
        elapsed_time = end_time - start_time
        
        # Check if operation succeeded or failed
        if 'error' in op_data:
            print(f"Deployment failed after {elapsed_time:.2f} seconds")
            print(f"Error: {op_data['error']}")
        else:
            print(f"Deployment completed successfully in {elapsed_time:.2f} seconds ({elapsed_time/60:.2f} minutes)")
            print(f"Result: {op_data.get('response', {})}")
        break
    
    # Progress
    elapsed = time.time() - start_time
    time.sleep(10) 

Started deployment operation: projects/757654702990/locations/us-central1/endpoints/2725107683605610496/operations/8774335697139007488
Deployment completed successfully in 282.50 seconds (4.71 minutes)
Result: {'@type': 'type.googleapis.com/google.cloud.aiplatform.v1beta1.DeployModelResponse', 'deployedModel': {'id': '5362018041989169152'}}
