In [None]:
import os
import glob

from google.cloud import storage
import google.cloud.aiplatform as aip

In [None]:
PROJECT_ID = "seq-rec-gcp-project-id"
MODEL_NAME = 'seq-rec-model-v0'
SAVED_MODEL_PATH = f'models/{MODEL_NAME}'

BUCKET_LOCATION = "ASIA-SOUTHEAST1"
BUCKET_NAME = "recsys-pipeline"
BUCKET_FOLDER_DIR = F"seq-rec/{MODEL_NAME}"

In [None]:
def upload_local_directory_to_gcs(local_path, bucket_name, gcs_path):
    gcs_client = storage.Client()

    bucket = gcs_client.get_bucket(bucket_name)
    assert os.path.isdir(local_path)
    for local_file in glob.glob(local_path + '/**'):
        if not os.path.isfile(local_file):
            upload_local_directory_to_gcs(local_file, bucket, gcs_path + "/" + os.path.basename(local_file))
        else:
            remote_path = os.path.join(gcs_path, local_file[1 + len(local_path):])
            blob = bucket.blob(remote_path)
            blob.upload_from_filename(local_file)

In [None]:
upload_local_directory_to_gcs(SAVED_MODEL_PATH, BUCKET_NAME , BUCKET_FOLDER_DIR)

# Create endpoint

Ref: https://cloud.google.com/vertex-ai/docs/predictions/deploy-model-api#aiplatform_create_endpoint_sample-gcloud

In [None]:
ENDPOINT_VARS = dict(
    ENDPOINT_LOCATION="asia-southeast1",
    ENDPOINT_NAME="seq-rec-model",
    ENDPOINT_VERSION="v0",
    MODEL_NAME=MODEL_NAME,
    PATH_TO_MODEL_ARTIFACT_DIRECTORY=f"gs://{BUCKET_NAME}/{BUCKET_FOLDER_DIR}",
    CONTAINER_IMAGE_URI="asia-docker.pkg.dev/vertex-ai/prediction/tf2-cpu.2-7:latest",
    # CONTAINER_IMAGE_URI="asia.gcr.io/seq-rec-gcp-project-id/tf-serving-scann",
    ENDPOINT_MACHINE_TYPE="n1-standard-2",
    ENDPOINT_MIN_REPLICA_COUNT="1",
    ENDPOINT_MAX_REPLICA_COUNT="1",
    BUCKET_LOCATION=BUCKET_LOCATION,
    BUCKET_NAME=BUCKET_NAME,
    BUCKET_FOLDER_DIR=BUCKET_FOLDER_DIR
)

for var_key, var_value in ENDPOINT_VARS.items():
    os.environ[var_key] = var_value

## Upload model to Vertex AI Models

In [None]:
aip.init(project=PROJECT_ID, location=ENDPOINT_VARS['ENDPOINT_LOCATION'])

In [None]:
# Ref: https://github.com/GoogleCloudPlatform/vertex-ai-samples/blob/main/notebooks/official/explainable_ai/sdk_custom_tabular_regression_online_explain.ipynb

model = aip.Model.upload(
    display_name=ENDPOINT_VARS['MODEL_NAME'],
    artifact_uri=ENDPOINT_VARS['PATH_TO_MODEL_ARTIFACT_DIRECTORY'],
    serving_container_image_uri=ENDPOINT_VARS['CONTAINER_IMAGE_URI'],
    sync=False
)

model.wait()

## Deploy the model to endpoint

In [None]:
%%time
TRAFFIC_SPLIT = {"0": 100}
DEPLOY_GPU = False

endpoint = model.deploy(
    deployed_model_display_name=ENDPOINT_VARS['MODEL_NAME'],
    traffic_split=TRAFFIC_SPLIT,
    machine_type=ENDPOINT_VARS['ENDPOINT_MACHINE_TYPE'],
    accelerator_type=DEPLOY_GPU,
    accelerator_count=0,
    min_replica_count=int(ENDPOINT_VARS['ENDPOINT_MIN_REPLICA_COUNT']),
    max_replica_count=int(ENDPOINT_VARS['ENDPOINT_MAX_REPLICA_COUNT']),
)


## Test the deployed model

In [None]:
%%time
instances = [
    {
        "context_merchants": ["<EXAMPLE_MERCHANT_ID>"],
        "context_search_terms": ["<EXAMPLE_SEARCH_TERM>"],
        "context_merchants_time_recency": ["1"],
        "context_search_terms_time_recency": ["1"]
    }
]
prediction = endpoint.predict(instances=instances)
prediction