In [11]:
from google.cloud import aiplatform
import time

In [12]:
PROJECT_ID='<project-id>'
LOCATION='asia-northeast3'
ENDPOINT_ID='<endpoint-id>' # 8768020291327632468
MODEL_NAME=f'projects/{PROJECT_ID}/locations/{LOCATION}/models/<model-id>' # google_gemma-2b-1718176207297
DEPLOYED_MODEL_DISPLAY_NAME='<display-name>'
API_ENDPOINT=f'{LOCATION}-aiplatform.googleapis.com'
TIMEOUT=7200

In [13]:
def deploy_model_sample(
    project: str,
    endpoint_id: str,
    model_name: str,
    deployed_model_display_name: str,
    location: str = "us-central1",
    api_endpoint: str = "us-central1-aiplatform.googleapis.com",
    timeout: int = 7200,
):
    # The AI Platform services require regional API endpoints.
    client_options = {"api_endpoint": api_endpoint}
    # Initialize client that will be used to create and send requests.
    # This client only needs to be created once, and can be reused for multiple requests.
    client = aiplatform.gapic.EndpointServiceClient(client_options=client_options)
    deployed_model = {
        # format: 'projects/{project}/locations/{location}/models/{model}'
        "model": model_name,
        "display_name": deployed_model_display_name,
        # AutoML Vision models require `automatic_resources` field
        # Other model types may require `dedicated_resources` field instead
        "dedicated_resources": {
            "min_replica_count": 1,
            "max_replica_count": 1,
            "machine_spec": {
                "machine_type": "a2-highgpu-1g",
                "accelerator_type": "NVIDIA_TESLA_A100",
                "accelerator_count": 1,
            },
        },
    }
    # key '0' assigns traffic for the newly deployed model
    # Traffic percentage values must add up to 100
    # Leave dictionary empty if endpoint should not accept any traffic
    traffic_split = {"0": 100}
    endpoint = client.endpoint_path(
        project=project, location=location, endpoint=endpoint_id
    )
    response = client.deploy_model(
        endpoint=endpoint, deployed_model=deployed_model, traffic_split=traffic_split
    )
    print("Long running operation:", response.operation.name)
    deploy_model_response = response.result(timeout=timeout)
    print("deploy_model_response:", deploy_model_response)

In [14]:
try:

  start_time = time.time()
  print(start_time)

  deploy_model_sample(
    project=PROJECT_ID,
    endpoint_id=ENDPOINT_ID,
    model_name=MODEL_NAME,
    deployed_model_display_name=DEPLOYED_MODEL_DISPLAY_NAME,
    location=LOCATION,
    api_endpoint=API_ENDPOINT,
    timeout=TIMEOUT,
  )

  end_time = time.time()
  print(end_time)
  print("--- %s seconds ---" % (time.time() - start_time))

except Exception as ex:
  print(f"Error: {ex}")
else:
  print("Success")


1718258428.2620625
Long running operation: projects/579186738071/locations/asia-northeast3/endpoints/8768020291327623168/operations/2913069196373917696
deploy_model_response: deployed_model {
  id: "9159446430815879168"
}

1718259787.9201727
--- 1359.6588623523712 seconds ---
Success


In [8]:
!gcloud ai endpoints describe $ENDPOINT_ID --project=$PROJECT_ID --region=$LOCATION

Using endpoint [https://asia-northeast3-aiplatform.googleapis.com/]
createTime: '2024-06-12T07:11:00.723525Z'
deployedModels:
- createTime: '2024-06-13T03:02:26.955618Z'
  dedicatedResources:
    machineSpec:
      acceleratorCount: 1
      acceleratorType: NVIDIA_TESLA_A100
      machineType: a2-highgpu-1g
    maxReplicaCount: 1
    minReplicaCount: 1
  displayName: google_gemma-2b-kevin
  id: '3954974111435849728'
  model: projects/579186738071/locations/asia-northeast3/models/google_gemma-2b-1718176207297
  modelVersionId: '1'
displayName: google_gemma-2b-mg-one-click-deploy
etag: AMEw9yOW23k9pbD47XJfPa1bDXvXONIowddbrhsv1iPrHJV3KR1tlYKgwAlqnEDe9i26
labels:
  mg-one-click-deploy: publishers-google-models-335
  versioned-mg-one-click-deploy: publishers-google-models-335-005
name: projects/579186738071/locations/asia-northeast3/endpoints/8768020291327623168
trafficSplit:
  '3954974111435849728': 100
updateTime: '2024-06-13T03:25:07.271479Z'
