In [None]:
%reload_ext autoreload
%autoreload 2

import os
import sys

sys.path.insert(0, '..')

import google.cloud.aiplatform as aip

import seq_rec.utils as utils
import seq_rec.utils.custom_logging

In [None]:
HYDRA_CONFIG_PATH = '../seq_rec/conf/'
cfg = utils.load_cfg(HYDRA_CONFIG_PATH)

In [None]:
PROJECT_ID = cfg.env.gcp.project_id
MODEL_NAME = 'seq_rec'

BUCKET_LOCATION = cfg.env.pipeline.kubeflow.region
BUCKET_NAME = cfg.env.pipeline.kubeflow.bucket_name
BUCKET_URL = cfg.env.pipeline.kubeflow.bucket_url
BUCKET_FOLDER_DIR = cfg.env.pipeline.kubeflow.bucket_dir

ENDPOINT_LOCATION = cfg.env.gcp.endpoint.location
ENDPOINT_NAME = cfg.env.gcp.endpoint.name
MODEL_VERSION = "0.2.0"
MODEL_VERSION_NODOT = MODEL_VERSION.replace(".", "")
MODEL_NAME_VERSION = f"{MODEL_NAME}_{MODEL_VERSION_NODOT}"
PATH_TO_MODEL_ARTIFACT_DIRECTORY = f"{BUCKET_URL}/{BUCKET_FOLDER_DIR}/{MODEL_VERSION}/model"
CONTAINER_IMAGE_URI = cfg.env.gcp.endpoint.container_image_uri
ENDPOINT_MACHINE_TYPE = cfg.env.gcp.endpoint.machine_type
ENDPOINT_MIN_REPLICA_COUNT = cfg.env.gcp.endpoint.min_replica_count
ENDPOINT_MAX_REPLICA_COUNT = cfg.env.gcp.endpoint.max_replica_count

# Select endpoint

## Create endpoint

In [None]:
aip.init(project=PROJECT_ID, location=ENDPOINT_LOCATION)

In [None]:
endpoint = aip.Endpoint.create(
    display_name=ENDPOINT_NAME,
    project=PROJECT_ID,
    location=ENDPOINT_LOCATION,
)

In [None]:
endpoint_id = endpoint.name
print(endpoint_id)

## Upload model to Vertex AI Models

In [None]:
# Ref: https://github.com/GoogleCloudPlatform/vertex-ai-samples/blob/main/notebooks/official/explainable_ai/sdk_custom_tabular_regression_online_explain.ipynb

model = aip.Model.upload(
    display_name=MODEL_NAME_VERSION,
    artifact_uri=PATH_TO_MODEL_ARTIFACT_DIRECTORY,
    serving_container_image_uri=CONTAINER_IMAGE_URI,
    sync=False
)

model.wait()

In [None]:
model_id = model.name
print(model_id)

## Deploy the model to endpoint

In [None]:
%%time
TRAFFIC_SPLIT = {"0": 100}
DEPLOY_GPU = False
if DEPLOY_GPU:
    ACCELERATOR_COUNT = 1
else:
    ACCELERATOR_COUNT = 0

endpoint = model.deploy(
    endpoint=endpoint,
    deployed_model_display_name=MODEL_NAME_VERSION,
    traffic_split=TRAFFIC_SPLIT,
    machine_type=ENDPOINT_MACHINE_TYPE,
    accelerator_type=DEPLOY_GPU,
    accelerator_count=ACCELERATOR_COUNT,
    min_replica_count=ENDPOINT_MIN_REPLICA_COUNT,
    max_replica_count=ENDPOINT_MAX_REPLICA_COUNT,
)


## Test the deployed model

In [None]:
instances = [
    {
        "context_merchants": ["<EXAMPLE_MERCHANT_ID>"],
        "context_search_terms": ["<EXAMPLE_SEARCH_TERM>"],
        "context_merchants_time_recency": ["1"],
        "context_search_terms_time_recency": ["1"],
        "recent_txn_merchants": ["<EXAMPLE_MERCHANT_ID>"],
        "recent_txn_time_recency": ["1"]
    }
]
prediction = endpoint.predict(instances=instances)
prediction

# Deploy new model to existing endpoint

In [None]:
endpoint = [endpoint for endpoint in aip.Endpoint.list() if endpoint.display_name == 'seq_rec'][0]
print(endpoint)

In [None]:
MODEL_NAME_VERSION = f"{MODEL_NAME_VERSION}_new"

model = aip.Model.upload(
    display_name=MODEL_NAME_VERSION,
    artifact_uri=PATH_TO_MODEL_ARTIFACT_DIRECTORY,
    serving_container_image_uri=CONTAINER_IMAGE_URI,
    sync=False
)

model.wait()

In [None]:
%%time
TRAFFIC_SPLIT = {"0": 100}
DEPLOY_GPU = False
if DEPLOY_GPU:
    ACCELERATOR_COUNT = 1
else:
    ACCELERATOR_COUNT = 0

endpoint = model.deploy(
    endpoint=endpoint,
    deployed_model_display_name=MODEL_NAME_VERSION,
    traffic_split=TRAFFIC_SPLIT,
    machine_type=ENDPOINT_MACHINE_TYPE,
    accelerator_type=DEPLOY_GPU,
    accelerator_count=ACCELERATOR_COUNT,
    min_replica_count=ENDPOINT_MIN_REPLICA_COUNT,
    max_replica_count=ENDPOINT_MAX_REPLICA_COUNT,
)


# Undeploy model

In [None]:
endpoint = [endpoint for endpoint in aip.Endpoint.list() if endpoint.display_name == ENDPOINT_NAME][0]

In [None]:
endpoint.list_models()

In [None]:
model = endpoint.list_models()[0]

In [None]:
model.model

In [None]:
endpoint.traffic_split

In [None]:
deployed_model_ids_in_use = set()
for deployed_model_id, split in endpoint.traffic_split.items():
    if split > 0:
        deployed_model_ids_in_use.add(deployed_model_id)
for deployed_model in endpoint.list_models():
    if deployed_model.id not in deployed_model_ids_in_use:
        print(f"Undeploying DeployedModel {deployed_model.id} from endpoint {endpoint.name}...")
        endpoint.undeploy(deployed_model.id)
        model_id = deployed_model.model
        model_to_delete = aip.Model(model_id)
        print(f"Deleting Model {model_id}...")
        model_to_delete.delete()