## Vertex AI: Local Model Custom Container Deployment

In [None]:
# Initialize the Vertex AI client with the desired region
aiplatform.init(project="<Project-Id>", location="<Region>")

# Create the endpoint
endpoint = aiplatform.Endpoint.create(
    display_name="service_request_classification_standard",
    # dedicated_endpoint_enabled=True  # Correct parameter for enabling dedicated resources
)

print(f"Endpoint created: {endpoint.resource_name}")

In [None]:
!gcloud ai endpoints list \
  --region=<Region> \
  --filter=display_name='service_request_classification_standard'

In [None]:
# Load the endpoint
endpoint_id = "<Endpoint-Id>"  # Replace with your endpoint ID
project_id = '<GCP Project Name>'
region = '<Region>'
endpoint = aiplatform.Endpoint(endpoint_name=f"projects/{project_id}/locations/{region}/endpoints/{endpoint_id}")

In [None]:
def deploy_model_with_dedicated_resources_sample(
    project,
    location,
    model_name: str,
    machine_type: str,
    endpoint: Optional[aiplatform.Endpoint] = None,
    deployed_model_display_name: Optional[str] = None,
    traffic_percentage: Optional[int] = 0,
    traffic_split: Optional[Dict[str, int]] = None,
    min_replica_count: int = 1,
    max_replica_count: int = 1,
    accelerator_type: Optional[str] = None,
    accelerator_count: Optional[int] = None,
    # explanation_metadata: Optional[explain.ExplanationMetadata] = None,
    # explanation_parameters: Optional[explain.ExplanationParameters] = None,
    # metadata: Optional[Sequence[Tuple[str, str]]] = (),
    sync: bool = True,
):
    """
    model_name: A fully-qualified model resource name or model ID.
          Example: "projects/123/locations/us-central1/models/456" or
          "456" when project and location are initialized or passed.
    """

    aiplatform.init(project=project, location=location)

    model = aiplatform.Model(model_name=model_name)

    # The explanation_metadata and explanation_parameters should only be
    # provided for a custom trained model and not an AutoML model.
    model.deploy(
        endpoint=endpoint,
        deployed_model_display_name=deployed_model_display_name,
        traffic_percentage=traffic_percentage,
        traffic_split=traffic_split,
        machine_type=machine_type,
        min_replica_count=min_replica_count,
        max_replica_count=max_replica_count,
        accelerator_type=accelerator_type,
        accelerator_count=accelerator_count,
        # explanation_metadata=explanation_metadata,
        # explanation_parameters=explanation_parameters,
        # metadata=metadata,
        sync=sync,
    )

    model.wait()

    print(model.display_name)
    print(model.resource_name)
    return model


In [None]:
output = deploy_model_with_dedicated_resources_sample(project='<GCP Project Name>', location='<Region>', model_name= '<Model-Name>', machine_type= 'n2-standard-80', endpoint=endpoint)

## Hugging Face Model

In [None]:
from google.cloud import aiplatform
from typing import Optional
from typing import Dict

In [None]:
def create_endpoint_sample(
    project: str,
    display_name: str,
    location: str,
):
    aiplatform.init(project=project, location=location)

    endpoint = aiplatform.Endpoint.create(
        display_name=display_name,
        project=project,
        location=location,
    )

    print(endpoint.display_name)
    print(endpoint.resource_name)
    return endpoint


In [None]:
endpoint = aiplatform.Endpoint.create(
  display_name="service_request_classification",
  dedicated_endpoint_enabled=True,
)

In [None]:
# Load the endpoint
endpoint_id = "<Endpoint-Id>"  # Replace with your endpoint ID
project_id = '<GCP Project Name>'
region = '<Region>'
endpoint = aiplatform.Endpoint(endpoint_name=f"projects/{project_id}/locations/{region}/endpoints/{endpoint_id}")

In [None]:
!gcloud ai endpoints list \
  --region=<Region> \
  --filter=display_name='service_request_classification'

In [None]:
def deploy_model_with_dedicated_resources_sample(
    project,
    location,
    model_name: str,
    machine_type: str,
    endpoint: Optional[aiplatform.Endpoint] = None,
    deployed_model_display_name: Optional[str] = None,
    traffic_percentage: Optional[int] = 0,
    traffic_split: Optional[Dict[str, int]] = None,
    min_replica_count: int = 1,
    max_replica_count: int = 1,
    accelerator_type: Optional[str] = None,
    accelerator_count: Optional[int] = None,
    # explanation_metadata: Optional[explain.ExplanationMetadata] = None,
    # explanation_parameters: Optional[explain.ExplanationParameters] = None,
    # metadata: Optional[Sequence[Tuple[str, str]]] = (),
    sync: bool = True,
):
    """
    model_name: A fully-qualified model resource name or model ID.
          Example: "projects/123/locations/us-central1/models/456" or
          "456" when project and location are initialized or passed.
    """

    aiplatform.init(project=project, location=location)

    model = aiplatform.Model(model_name=model_name)

    # The explanation_metadata and explanation_parameters should only be
    # provided for a custom trained model and not an AutoML model.
    model.deploy(
        endpoint=endpoint,
        deployed_model_display_name=deployed_model_display_name,
        traffic_percentage=traffic_percentage,
        traffic_split=traffic_split,
        machine_type=machine_type,
        min_replica_count=min_replica_count,
        max_replica_count=max_replica_count,
        accelerator_type=accelerator_type,
        accelerator_count=accelerator_count,
        # explanation_metadata=explanation_metadata,
        # explanation_parameters=explanation_parameters,
        # metadata=metadata,
        sync=sync,
    )

    model.wait()

    print(model.display_name)
    print(model.resource_name)
    return model


In [None]:
output = deploy_model_with_dedicated_resources_sample(project='<GCP Project Name>', location='<Region>', model_name= '<Model-Name>', machine_type= 'n2-standard-80', endpoint=endpoint)

In [None]:
!gcloud auth application-default login


In [None]:
!curl -X POST \
-H "Content-Type: application/json" \
-d '{"instances": [{"text": "I love this!"}, {"text": "I hate this!"}]}' \
https://<Endpoint-Id>.<Region>-<Project-Id>.prediction.vertexai.goog/predict


In [None]:
!curl -X POST \
-H "Content-Type: application/json" \
-d '{"instances": ["I love this!", "I hate this!"]}' \
https://<Endpoint-Id>.<Region>-<Project-Id>.prediction.vertexai.goog/predict


In [None]:
! curl \
-H "Authorization: Bearer $(gcloud auth print-access-token)" \
-H "Content-Type: application/json" \
-d '{"instances": ["I love this!", "I hate this!"]}' \
https://<Region>-aiplatform.googleapis.com/v1/projects/<Project-Id>/locations/<Region>/endpoints/<Endpoint-Id>:predict

In [None]:
curl \ -X POST \ -H "Authorization: Bearer $(gcloud auth print-access-token)" \ -H "Content-Type: application/json" \ "https://<Endpoint-Id>.<Region>-<Project-Id>.prediction.vertexai.goog/v1/projects/${PROJECT_ID}/locations/<Region>/endpoints/${ENDPOINT_ID}:predict" \ -d "@${INPUT_DATA_FILE}"

In [None]:
!gcloud ai endpoints list --project='<GCP Project Name>' --region=<Region>

In [None]:
!gcloud ai endpoints describe <Endpoint-Id> --project='<GCP Project Name>' --region=<Region>

In [None]:
import json
import requests
from google.auth import default
from google.auth.transport.requests import Request

# Replace with your actual values
project_id = "<Project-Id>"
endpoint_id = "<Endpoint-Id>"
location = "<Region>"
url = "https://<Endpoint-Id>.<Region>-<Project-Id>.prediction.vertexai.goog/v1/predict"

# Define the instances and parameters for your model input
instances = [{"text": "I want to take a loan"}]
parameters = {
    "temperature": 0.7,
    "maxOutputTokens": 5000,
    "topP": 0.8,
    "topK": 40,
}

# Prepare the payload
payload = {
    "instances": instances
    # Uncomment the line below if your model supports parameters
    # "parameters": parameters,
}

# Get the authentication token
credentials, _ = default()
credentials.refresh(Request())
token = credentials.token

# Set up the request headers
headers = {
    "Authorization": f"Bearer {token}",
    "Content-Type": "application/json",
}

# Send the POST request
response = requests.post(url, headers=headers, json=payload)

# Check the response
if response.status_code == 200:
    print("Response:", response.json())
else:
    print(f"Error: {response.status_code}, {response.text}")


In [None]:
!curl -X GET \
https://<Endpoint-Id>.<Region>-<Project-Id>.prediction.vertexai.goog/


In [None]:
!gcloud compute machine-types list --zones='<Region>-a'



In [None]:
# Initialize the Vertex AI client with the desired region
aiplatform.init(project="<Project-Id>", location="<Region>")

# Create the endpoint
endpoint = aiplatform.Endpoint.create(
    display_name="service_request_classification_standard",
    # dedicated_endpoint_enabled=True  # Correct parameter for enabling dedicated resources
)

print(f"Endpoint created: {endpoint.resource_name}")

In [None]:
!gcloud ai endpoints list \
  --region=<Region> \
  --filter=display_name='service_request_classification_standard'

In [None]:
# Load the endpoint
endpoint_id = "<Endpoint-Id>"  # Replace with your endpoint ID
project_id = '<GCP Project Name>'
region = '<Region>'
endpoint = aiplatform.Endpoint(endpoint_name=f"projects/{project_id}/locations/{region}/endpoints/{endpoint_id}")

In [None]:
def deploy_model_with_dedicated_resources_sample(
    project,
    location,
    model_name: str,
    machine_type: str,
    endpoint: Optional[aiplatform.Endpoint] = None,
    deployed_model_display_name: Optional[str] = None,
    traffic_percentage: Optional[int] = 0,
    traffic_split: Optional[Dict[str, int]] = None,
    min_replica_count: int = 1,
    max_replica_count: int = 1,
    accelerator_type: Optional[str] = None,
    accelerator_count: Optional[int] = None,
    # explanation_metadata: Optional[explain.ExplanationMetadata] = None,
    # explanation_parameters: Optional[explain.ExplanationParameters] = None,
    # metadata: Optional[Sequence[Tuple[str, str]]] = (),
    sync: bool = True,
):
    """
    model_name: A fully-qualified model resource name or model ID.
          Example: "projects/123/locations/us-central1/models/456" or
          "456" when project and location are initialized or passed.
    """

    aiplatform.init(project=project, location=location)

    model = aiplatform.Model(model_name=model_name)

    # The explanation_metadata and explanation_parameters should only be
    # provided for a custom trained model and not an AutoML model.
    model.deploy(
        endpoint=endpoint,
        deployed_model_display_name=deployed_model_display_name,
        traffic_percentage=traffic_percentage,
        traffic_split=traffic_split,
        machine_type=machine_type,
        min_replica_count=min_replica_count,
        max_replica_count=max_replica_count,
        accelerator_type=accelerator_type,
        accelerator_count=accelerator_count,
        # explanation_metadata=explanation_metadata,
        # explanation_parameters=explanation_parameters,
        # metadata=metadata,
        sync=sync,
    )

    model.wait()

    print(model.display_name)
    print(model.resource_name)
    return model


In [None]:
output = deploy_model_with_dedicated_resources_sample(project='<GCP Project Name>', location='<Region>', model_name= '<Model-Name>', machine_type= 'n2-standard-80', endpoint=endpoint)

---

In [None]:
# Initialize the Vertex AI client with the desired region
aiplatform.init(project="<Project-Id>", location="us-central1")

# Create the endpoint
endpoint = aiplatform.Endpoint.create(
    display_name="service_request_classification",
    dedicated_endpoint_enabled=True  # Correct parameter for enabling dedicated resources
)

print(f"Endpoint created: {endpoint.resource_name}")

In [None]:
!gcloud ai endpoints list \
  --region=us-central1 \
  --filter=display_name='service_request_classification'

In [None]:
# Load the endpoint
endpoint_id = "<Endpoint-Id>"  # Replace with your endpoint ID
project_id = '<GCP Project Name>'
region = 'us-central1'
endpoint = aiplatform.Endpoint(endpoint_name=f"projects/{project_id}/locations/{region}/endpoints/{endpoint_id}")

In [None]:
def deploy_model_with_dedicated_resources_sample(
    project,
    location,
    model_name: str,
    machine_type: str,
    endpoint: Optional[aiplatform.Endpoint] = None,
    deployed_model_display_name: Optional[str] = None,
    traffic_percentage: Optional[int] = 0,
    traffic_split: Optional[Dict[str, int]] = None,
    min_replica_count: int = 1,
    max_replica_count: int = 1,
    accelerator_type: Optional[str] = None,
    accelerator_count: Optional[int] = None,
    # explanation_metadata: Optional[explain.ExplanationMetadata] = None,
    # explanation_parameters: Optional[explain.ExplanationParameters] = None,
    # metadata: Optional[Sequence[Tuple[str, str]]] = (),
    sync: bool = True,
):
    """
    model_name: A fully-qualified model resource name or model ID.
          Example: "projects/123/locations/us-central1/models/456" or
          "456" when project and location are initialized or passed.
    """

    aiplatform.init(project=project, location=location)

    model = aiplatform.Model(model_name=model_name)

    # The explanation_metadata and explanation_parameters should only be
    # provided for a custom trained model and not an AutoML model.
    model.deploy(
        endpoint=endpoint,
        deployed_model_display_name=deployed_model_display_name,
        traffic_percentage=traffic_percentage,
        traffic_split=traffic_split,
        machine_type=machine_type,
        min_replica_count=min_replica_count,
        max_replica_count=max_replica_count,
        accelerator_type=accelerator_type,
        accelerator_count=accelerator_count,
        # explanation_metadata=explanation_metadata,
        # explanation_parameters=explanation_parameters,
        # metadata=metadata,
        sync=sync,
    )

    model.wait()

    print(model.display_name)
    print(model.resource_name)
    return model


In [None]:
output = deploy_model_with_dedicated_resources_sample(project='<GCP Project Name>', location='us-central1', model_name= '<Model-Name>', machine_type= 'n2-standard-80', endpoint=endpoint)

In [None]:
!curl -X POST \
-H "Content-Type: application/json" \
-d '{"instances": ["I love this!", "I hate this!"]}' \
<Endpoint-Id>.us-central1-<Project-Id>.prediction.vertexai.goog/predict


In [None]:
!curl -X GET \
<Endpoint-Id>.us-central1-<Project-Id>.prediction.vertexai.goog/

In [None]:
result = endpoint.predict(instances=["I love this!", "I hate this!"])

In [None]:
# Initialize the Vertex AI client with the desired region
aiplatform.init(project="<Project-Id>", location="us-central1")

# Create the endpoint
endpoint = aiplatform.Endpoint.create(
    display_name="service_request_classification_standard",
    # dedicated_endpoint_enabled=True  # Correct parameter for enabling dedicated resources
)

print(f"Endpoint created: {endpoint.resource_name}")

In [None]:
!gcloud ai endpoints list \
  --region=us-central1 \
  --filter=display_name='service_request_classification'

In [None]:
# Load the endpoint
endpoint_id = "<Endpoint-Id>"  # Replace with your endpoint ID
project_id = '<GCP Project Name>'
region = 'us-central1'
endpoint = aiplatform.Endpoint(endpoint_name=f"projects/{project_id}/locations/{region}/endpoints/{endpoint_id}")

In [None]:
def deploy_model_with_dedicated_resources_sample(
    project,
    location,
    model_name: str,
    machine_type: str,
    endpoint: Optional[aiplatform.Endpoint] = None,
    deployed_model_display_name: Optional[str] = None,
    traffic_percentage: Optional[int] = 0,
    traffic_split: Optional[Dict[str, int]] = None,
    min_replica_count: int = 1,
    max_replica_count: int = 1,
    accelerator_type: Optional[str] = None,
    accelerator_count: Optional[int] = None,
    # explanation_metadata: Optional[explain.ExplanationMetadata] = None,
    # explanation_parameters: Optional[explain.ExplanationParameters] = None,
    # metadata: Optional[Sequence[Tuple[str, str]]] = (),
    sync: bool = True,
):
    """
    model_name: A fully-qualified model resource name or model ID.
          Example: "projects/123/locations/us-central1/models/456" or
          "456" when project and location are initialized or passed.
    """

    aiplatform.init(project=project, location=location)

    model = aiplatform.Model(model_name=model_name)

    # The explanation_metadata and explanation_parameters should only be
    # provided for a custom trained model and not an AutoML model.
    model.deploy(
        endpoint=endpoint,
        deployed_model_display_name=deployed_model_display_name,
        traffic_percentage=traffic_percentage,
        traffic_split=traffic_split,
        machine_type=machine_type,
        min_replica_count=min_replica_count,
        max_replica_count=max_replica_count,
        accelerator_type=accelerator_type,
        accelerator_count=accelerator_count,
        # explanation_metadata=explanation_metadata,
        # explanation_parameters=explanation_parameters,
        # metadata=metadata,
        sync=sync,
    )

    model.wait()

    print(model.display_name)
    print(model.resource_name)
    return model


In [None]:
output = deploy_model_with_dedicated_resources_sample(project='<GCP Project Name>', location='us-central1', model_name= '<Model-Name>', machine_type= 'n2-standard-80', endpoint=endpoint)