# Import packages

In [None]:
import comet_ml
comet_ml.init(project_name='seq-rec')
from comet_ml import Experiment

In [None]:
from __future__ import absolute_import, division, print_function, unicode_literals

import os
from six.moves import urllib
import tempfile

from typing import Dict, Text

import numpy as np
import pandas as pd
import tensorflow as tf
import tensorflow_recommenders as tfrs

from google.cloud import bigquery
from google.api_core.exceptions import GoogleAPIError
from tensorflow.python.framework import ops
from tensorflow.python.framework import dtypes
from tensorflow_io.bigquery import BigQueryClient
from tensorflow_io.bigquery import BigQueryReadSession

In [None]:
USE_GPU = True
if USE_GPU:
    print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))
    device_name = tf.test.gpu_device_name()
    if device_name != '/device:GPU:0':
        raise SystemError('GPU device not found')
    print('Found GPU at: {}'.format(device_name))

# Configure auth to GCP resources

In [None]:
PROJECT_ID = "seq-rec-gcp-project-id"
os.environ['PROJECT_ID'] = PROJECT_ID
os.environ['GCLOUD_PROJECT'] = PROJECT_ID

In [None]:
%%bash

gcloud config set project ${PROJECT_ID}
env GCLOUD_PROJECT=${PROJECT_ID}

# Download input data

In [None]:
LOCATION = 'us'

DATASET_ID = 'data_science_dbt'
TRAINING_TABLE_ID = 'fct_seq_rec_pad_train_30d'
EVAL_TABLE_ID = 'fct_seq_rec_pad_eval_val'
TEST_TABLE_ID = 'fct_seq_rec_pad_eval_test'

CSV_SCHEMA = [
      bigquery.SchemaField("time", "TIMESTAMP"),
      bigquery.SchemaField("user_id", "STRING"),
      bigquery.SchemaField("event_name", "STRING"),
      bigquery.SchemaField("merchant_id", "STRING"),
      bigquery.SchemaField("prev_search_term_list", "STRING"),
      bigquery.SchemaField("prev_event_ruid_list", "STRING"),
      bigquery.SchemaField("prev_search_term_time_diff_seconds_list", "STRING"),
      bigquery.SchemaField("prev_event_time_diff_seconds_list", "STRING"),
  ]

UNUSED_COLUMNS = ['time']

def transform_row(row_dict):
    # Trim all string tensors
    features_dict = { column:
                    (tf.strings.strip(tensor) if tensor.dtype == 'string' else tensor) 
                    for (column, tensor) in row_dict.items()
                    }
    return features_dict

def read_bigquery(table_name):
    tensorflow_io_bigquery_client = BigQueryClient()
    read_session = tensorflow_io_bigquery_client.read_session(
        "projects/" + PROJECT_ID,
        PROJECT_ID, table_name, DATASET_ID,
        list(field.name for field in CSV_SCHEMA 
              if not field.name in UNUSED_COLUMNS),
        list(dtypes.double if field.field_type == 'FLOAT64' 
              else dtypes.string for field in CSV_SCHEMA
              if not field.name in UNUSED_COLUMNS),
        requested_streams=2)

    dataset = read_session.parallel_read_rows()
    transformed_ds = dataset.map(transform_row)
    return transformed_ds


In [None]:
BATCH_SIZE = 32
RANDOM_SEED = 13
SHUFFLE_BUFFER_SIZE = 1_000_000

tf.random.set_seed(RANDOM_SEED)

training_ds = read_bigquery(TRAINING_TABLE_ID).shuffle(SHUFFLE_BUFFER_SIZE, reshuffle_each_iteration=False)
eval_ds = read_bigquery(EVAL_TABLE_ID)
test_ds = read_bigquery(TEST_TABLE_ID)

In [None]:
merchant_ids = training_ds.batch(1_000_000).map(lambda x: x["merchant_id"])
user_ids = training_ds.batch(1_000_000).map(lambda x: x["user_id"])

unique_merchant_ids = np.unique(np.concatenate(list(merchant_ids)))
# unique_user_ids = np.unique(np.concatenate(list(user_ids)))

In [None]:
def calc_sample_weight(event_name: str):
    if event_name == 'View_Merchant':
        return 1
    if event_name == 'Transaction_Success':
        return 5
    return 1

def transform_train_data(ds):
    return ds.map(lambda x: {
        "target_merchant_id": x["merchant_id"],
        "context_search_terms": tf.strings.split(x['prev_search_term_list'], sep='|'),
        "context_search_terms_len": len(tf.strings.split(x['prev_search_term_list'], sep='|')),
        "context_merchants": tf.strings.split(x['prev_event_ruid_list'], sep='|'),
        "context_search_terms_time_recency": tf.strings.split(x['prev_search_term_time_diff_seconds_list'], sep='|'),
        "context_merchants_time_recency": tf.strings.split(x['prev_event_time_diff_seconds_list'], sep='|'),
        "user_id": x["user_id"],
        "sample_weight": calc_sample_weight(x['event_name'])
    })

training_ds_prep = transform_train_data(training_ds)
eval_ds_prep = transform_train_data(eval_ds)
test_ds_prep = transform_train_data(test_ds)

In [None]:
search_terms = training_ds_prep.batch(1_000_000).map(lambda x: x['context_search_terms'])

In [None]:
for item in training_ds_prep.take(1):
    print(item)

# Modeling

In [None]:
embedding_dimension = 64
max_search_term_tokens = 10000
time_recency_num_buckets = 61 # Due to at query building we specify 1800s divide by 30s
time_recency_buckets = np.array(list(map(str, range(time_recency_num_buckets))))

## The candidate tower

In [None]:
class CandidateModel(tf.keras.Model):
    def __init__(self):
        super().__init__()

        target_input = tf.keras.Input(shape=[None,], dtype=tf.string)
        x = tf.keras.layers.StringLookup(vocabulary=unique_merchant_ids, mask_token=None)(target_input)
        merchant_embedding_output = tf.keras.layers.Embedding(len(unique_merchant_ids) + 1, embedding_dimension)(x)
        self.merchant_embedding = tf.keras.Model([target_input], merchant_embedding_output, name='target_embedding')

    def call(self, merchant_ids):
        return self.merchant_embedding(merchant_ids)

In [None]:
# Test output given an input
candidate_model = CandidateModel()
candidate_model(np.array([[b'<EXAMPLE_MERCHANT_ID>']]))

## The query tower

In [None]:
class QueryModel(tf.keras.Model):
    def __init__(self):
        super().__init__()

        # Context Merchants
        context_merchants_inputs = tf.keras.Input(shape=[None,], dtype=tf.string)
        x = tf.keras.layers.StringLookup(vocabulary=unique_merchant_ids, mask_token=None)(context_merchants_inputs)  # If specifying mask_token = 'NULL' then weird indices error occurs... Anyway we don't need to specify the mask_token since the NULL is left out already because of using fixed vocab
        merchant_embedding = tf.keras.layers.Embedding(input_dim=len(unique_merchant_ids) + 1, output_dim=embedding_dimension)(x)
        
        context_merchants_time_recency_inputs = tf.keras.Input(shape=[None,], dtype=tf.string)
        x = tf.keras.layers.StringLookup(vocabulary=time_recency_buckets, mask_token=None)(context_merchants_time_recency_inputs)
        merchant_recency_embedding = tf.keras.layers.Embedding(input_dim=len(time_recency_buckets) + 1, output_dim=embedding_dimension)(x)
        
        merchant_features_embedding = tf.concat([merchant_embedding, merchant_recency_embedding], axis=2)
        context_merchants_outputs = tf.keras.layers.GRU(embedding_dimension)(merchant_features_embedding)
        self.context_merchants_embedding = tf.keras.Model([context_merchants_inputs, context_merchants_time_recency_inputs], context_merchants_outputs, name='context_merchants_embedding')

        # Context Search Terms
        context_search_terms_inputs = tf.keras.Input(shape=[None,], dtype=tf.string)
        self.search_term_string_lookup_layer = tf.keras.layers.StringLookup(
            max_tokens=max_search_term_tokens,
            mask_token='NULL'
        )
        self.search_term_string_lookup_layer.adapt(search_terms)
        x = self.search_term_string_lookup_layer(context_search_terms_inputs)
        search_term_embedding = tf.keras.layers.Embedding(input_dim=self.search_term_string_lookup_layer.vocabulary_size(), output_dim=embedding_dimension)(x)

        context_search_terms_time_recency_inputs = tf.keras.Input(shape=[None,], dtype=tf.string)
        x = tf.keras.layers.StringLookup(vocabulary=time_recency_buckets, mask_token=None)(context_search_terms_time_recency_inputs)
        search_term_recency_embedding = tf.keras.layers.Embedding(input_dim=len(time_recency_buckets) + 1, output_dim=embedding_dimension)(x)
        
        search_term_features_embedding = tf.concat([search_term_embedding, search_term_recency_embedding], axis=2)
        context_search_terms_outputs = tf.keras.layers.GRU(embedding_dimension)(search_term_features_embedding)
        self.context_search_terms_embedding = tf.keras.Model([context_search_terms_inputs, context_search_terms_time_recency_inputs], context_search_terms_outputs, name='context_search_terms_embedding')
        
        # Adding user_id introduces huge overfit. Need to know how to control this overfit before adding this.
        # user_id_input = tf.keras.Input(shape=[None,], dtype=tf.string)
        # x = tf.keras.layers.StringLookup(vocabulary=unique_user_ids, mask_token=None)(user_id_input)
        # user_id_output = tf.keras.layers.Embedding(input_dim=len(unique_user_ids) + 1, output_dim=embedding_dimension)(x)
        # self.user_embedding = tf.keras.Model(user_id_input, user_id_output, name='user_id_embedding')

    def call(self, inputs):
        return tf.concat([
            self.context_search_terms_embedding([inputs['context_search_terms'], inputs['context_search_terms_time_recency']]),
            self.context_merchants_embedding([inputs['context_merchants'], inputs['context_merchants_time_recency']]),
            # self.user_embedding(inputs['user_id'])
        ], axis=1)

# The full model

In [None]:
class SequentialRecModel(tfrs.models.Model):

    def __init__(self):
        super().__init__()
        self.query_model = tf.keras.Sequential([
            QueryModel(),
            tf.keras.layers.Dense(embedding_dimension),
        ])
        self.candidate_model = tf.keras.Sequential([
            CandidateModel(),
            tf.keras.layers.Dense(embedding_dimension),
        ])
        self.task = tfrs.tasks.Retrieval(
            metrics=tfrs.metrics.FactorizedTopK(
                candidates=tf.data.Dataset.from_tensor_slices(unique_merchant_ids).batch(128).map(self.candidate_model),
            ),
            # batch_metrics=[tf.keras.metrics.AUC]  # Can not use because missing y_pred error
        )

    def compute_loss(self, features, training=False):
        query_embeddings = self.query_model({
            "context_search_terms": features["context_search_terms"],
            "context_merchants": features["context_merchants"],
            "context_merchants_time_recency": features["context_merchants_time_recency"],
            "context_search_terms_time_recency": features["context_search_terms_time_recency"],
            # "user_id": features["user_id"]
        })
        candidate_embeddings = self.candidate_model(features['target_merchant_id'])
        sample_weight = features['sample_weight']

        return self.task(query_embeddings, candidate_embeddings, sample_weight)

In [None]:
epochs = 30
optimizer = tf.keras.optimizers.Adam(0.003)
optimizer_log_fmt = optimizer.get_config()
batch_size = 256
input_data_window = '30d'
sample_weight = {
    "View_Merchant": 1,
    "Transaction_Success": 5
}

params = {
    'batch_size': batch_size,
    'epochs': epochs,
    'optimizer': optimizer_log_fmt,
    'embedding_dimension': embedding_dimension,
    'max_search_term_tokens': max_search_term_tokens,
    'input_data_window': input_data_window,
    'sample_weight': sample_weight
}

In [None]:
model = SequentialRecModel()
model.compile(optimizer=optimizer)

In [None]:
model.query_model.layers[0].context_merchants_embedding.summary()

In [None]:
model.query_model.layers[0].context_search_terms_embedding.summary()

# Fitting and evaluating

In [None]:
cached_train = training_ds_prep.shuffle(1_000_000).batch(batch_size).cache()
cached_eval = eval_ds_prep.batch(batch_size).cache()
cached_test = test_ds_prep.batch(batch_size).cache()

## Training

In [None]:
#create an experiment with your api key
experiment = Experiment(
    auto_metric_logging=True,
    auto_param_logging=False,
    auto_histogram_weight_logging=True,
    auto_histogram_gradient_logging=True,
    auto_histogram_activation_logging=True,
    auto_histogram_tensorboard_logging=True
)

In [None]:
callback_early_stopping = tf.keras.callbacks.EarlyStopping(
    monitor='val_factorized_top_k/top_10_categorical_accuracy',
    min_delta=0.001,
    patience=3,
    verbose=1,
    mode='auto',
    baseline=None,
    restore_best_weights=True
)

In [None]:
model.fit(cached_train, epochs=epochs, callbacks=[callback_early_stopping], validation_data=cached_eval)

In [None]:
metrics_test = model.evaluate(cached_test, return_dict=True)
print(metrics_test)

In [None]:
%%time
with experiment.train():
    model.fit(cached_train, epochs=epochs, callbacks=[callback_early_stopping], validation_data=cached_eval)

In [None]:
with experiment.test():
    metrics_test = model.evaluate(cached_test, return_dict=True)
    print(metrics_test)
    experiment.log_metrics(metrics_test)

In [None]:
experiment.log_parameters(params)

In [None]:
experiment.end()

# Save embeddings

In [None]:
def save_embeddings(embedding_keys, embedding_values, name):
    log_dir = f'logs/embeddings/{name}/'
    if not os.path.exists(log_dir):
        os.makedirs(log_dir)
    else:
        raise Exception(f'{log_dir} already exists')

    with open(os.path.join(log_dir, 'metadata.tsv'), "w") as f:
        for key in embedding_keys:
            f.write("{}\n".format(key))
    
    weights = tf.Variable(embedding_values)
    checkpoint = tf.train.Checkpoint(embedding=weights)
    checkpoint.save(os.path.join(log_dir, "embedding.ckpt"))
    
    return True

In [None]:
search_term_weights = model.query_model.layers[0].context_search_terms_embedding.layers[4].get_weights()[0]
search_term_keys = model.query_model.layers[0].context_search_terms_embedding.layers[2].get_vocabulary()
save_embeddings(search_term_keys, search_term_weights, name='search_terms')

In [None]:
merchant_weights = model.query_model.layers[0].context_merchants_embedding.layers[4].get_weights()[0]
merchant_keys = model.query_model.layers[0].context_merchants_embedding.layers[2].get_vocabulary()
save_embeddings(merchant_keys, merchant_weights, name='merchants')

# Test predictions

In [None]:
# TODO: Find how to deploy ScaNN to tensorflow serving
# index = tfrs.layers.factorized_top_k.ScaNN(model.query_model)

index = tfrs.layers.factorized_top_k.BruteForce(model.query_model)
index.index_from_dataset(
  tf.data.Dataset.zip((tf.data.Dataset.from_tensor_slices(unique_merchant_ids).batch(100), tf.data.Dataset.from_tensor_slices(unique_merchant_ids).batch(100).map(model.candidate_model)))
)

input = {
    'context_merchants': np.array([[b'<EXAMPLE_MERCHANT_ID>']]),
    'context_search_terms': np.array([[b'<EXAMPLE_SEARCH_TERM>']]),
    "context_merchants_time_recency": np.array([[b'1']]),
    "context_search_terms_time_recency": np.array([[b'1']])
}

_, recommendations = index(input)
print(f"Recommendations: {recommendations[0, :10]}")

# Export for serving

In [None]:
MODEL_NAME = 'seq-rec-model-v0'
SAVED_MODEL_PATH = f'models/{MODEL_NAME}'

In [None]:
signature_dict = {
    'context_merchants': tf.TensorSpec(shape=[None, 1], dtype=tf.string, name='context_merchants'),
    'context_search_terms': tf.TensorSpec(shape=[None, 1], dtype=tf.string, name='context_search_terms'),
    'context_merchants_time_recency': tf.TensorSpec(shape=[None, 1], dtype=tf.string, name='context_merchants_time_recency'),
    'context_search_terms_time_recency': tf.TensorSpec(shape=[None, 1], dtype=tf.string, name='context_search_terms_time_recency'),
}

@tf.function(input_signature=[signature_dict])
def rec_at_10(data):
    result = index(data, k=10)
    return {
        "scores": result[0],
        "merchant_id": result[1]
    }

@tf.function(input_signature=[signature_dict])
def rec_at_100(data):
    result = index(data, k=100)
    return {
        "scores": result[0],
        "merchant_id": result[1]
    }


In [None]:
rec_at_100(input)

In [None]:
tf.saved_model.save(
    index,
    SAVED_MODEL_PATH,
    signatures={
        "serving_default": rec_at_100,
        "k_10": rec_at_10,
        "k_100": rec_at_100,
    }
)

In [None]:
loaded = tf.saved_model.load(SAVED_MODEL_PATH)
scores, titles = loaded(input)
print(f"Recommendations: {titles[0][:10]}")

In [None]:
loaded.signatures['serving_default'](**input)

In [None]:
loaded.signatures['k_100'](**input)

## Analyze the signature to call the model

In [None]:
os.environ['MODEL_EXPORT_PATH'] = SAVED_MODEL_PATH

In [None]:
%%bash

saved_model_cli show --dir ${MODEL_EXPORT_PATH} \
     --tag_set serve --signature_def serving_default

# Upload model to GCS

In [None]:
import glob
from google.cloud import storage

def upload_local_directory_to_gcs(local_path, bucket_name, gcs_path):
    gcs_client = storage.Client()

    bucket = gcs_client.get_bucket(bucket_name)
    assert os.path.isdir(local_path)
    for local_file in glob.glob(local_path + '/**'):
        if not os.path.isfile(local_file):
            upload_local_directory_to_gcs(local_file, bucket, gcs_path + "/" + os.path.basename(local_file))
        else:
            remote_path = os.path.join(gcs_path, local_file[1 + len(local_path):])
            blob = bucket.blob(remote_path)
            blob.upload_from_filename(local_file)

In [None]:
BUCKET_LOCATION = "ASIA-SOUTHEAST1"
BUCKET_NAME = "recsys-pipeline"
BUCKET_FOLDER_DIR = F"seq-rec/{MODEL_NAME}"

In [None]:
upload_local_directory_to_gcs(SAVED_MODEL_PATH, BUCKET_NAME , BUCKET_FOLDER_DIR)

# Create endpoints

Ref: https://cloud.google.com/vertex-ai/docs/predictions/deploy-model-api#aiplatform_create_endpoint_sample-gcloud

In [None]:
ENDPOINT_VARS = dict(
    ENDPOINT_LOCATION="asia-southeast1",
    ENDPOINT_NAME="seq-rec-model",
    ENDPOINT_VERSION="v0",
    MODEL_NAME=MODEL_NAME,
    PATH_TO_MODEL_ARTIFACT_DIRECTORY=f"gs://{BUCKET_NAME}/{BUCKET_FOLDER_DIR}",
    CONTAINER_IMAGE_URI="asia-docker.pkg.dev/vertex-ai/prediction/tf2-cpu.2-7:latest",
    # CONTAINER_IMAGE_URI="asia.gcr.io/seq-rec-gcp-project-id/tf-serving-scann",
    ENDPOINT_MACHINE_TYPE="n1-standard-2",
    ENDPOINT_MIN_REPLICA_COUNT="1",
    ENDPOINT_MAX_REPLICA_COUNT="1",
    BUCKET_LOCATION=BUCKET_LOCATION,
    BUCKET_NAME=BUCKET_NAME,
    BUCKET_FOLDER_DIR=BUCKET_FOLDER_DIR
)

for var_key, var_value in ENDPOINT_VARS.items():
    os.environ[var_key] = var_value

## Upload model to Vertex AI Models

In [None]:
import google.cloud.aiplatform as aip

In [None]:
aip.init(project=PROJECT_ID, location=ENDPOINT_VARS['ENDPOINT_LOCATION'])

In [None]:
# Ref: https://github.com/GoogleCloudPlatform/vertex-ai-samples/blob/main/notebooks/official/explainable_ai/sdk_custom_tabular_regression_online_explain.ipynb

model = aip.Model.upload(
    display_name=ENDPOINT_VARS['MODEL_NAME'],
    artifact_uri=ENDPOINT_VARS['PATH_TO_MODEL_ARTIFACT_DIRECTORY'],
    serving_container_image_uri=ENDPOINT_VARS['CONTAINER_IMAGE_URI'],
    sync=False
)

model.wait()

### Deploy the model to endpoint

In [None]:
%%time
TRAFFIC_SPLIT = {"0": 100}
DEPLOY_GPU = False

endpoint = model.deploy(
    deployed_model_display_name=ENDPOINT_VARS['MODEL_NAME'],
    traffic_split=TRAFFIC_SPLIT,
    machine_type=ENDPOINT_VARS['ENDPOINT_MACHINE_TYPE'],
    accelerator_type=DEPLOY_GPU,
    accelerator_count=0,
    min_replica_count=int(ENDPOINT_VARS['ENDPOINT_MIN_REPLICA_COUNT']),
    max_replica_count=int(ENDPOINT_VARS['ENDPOINT_MAX_REPLICA_COUNT']),
)


# Test the deployed model

In [None]:
%%time
instances = [
    {
        "context_merchants": ["<EXAMPLE_MERCHANT_ID>"],
        "context_search_terms": ["<EXAMPLE_SEARCH_TERM>"],
        "context_merchants_time_recency": ["1"],
        "context_search_terms_time_recency": ["1"]
    }
]
prediction = endpoint.predict(instances=instances)
prediction

In [None]:
prediction.predictions[0]

# Batch predictions with Vertex AI Models

In [None]:
from typing import Union, Sequence
from google.cloud import aiplatform, aiplatform_v1

In [None]:
def create_batch_prediction_job_dedicated_resources(
    project: str,
    location: str,
    model_resource_name: str,
    job_display_name: str,
    gcs_source: Union[str, Sequence[str]],
    gcs_destination: str,
    machine_type: str = "n1-standard-2",
    accelerator_count: int = 1,
    accelerator_type: Union[str, aiplatform_v1.AcceleratorType] = "NVIDIA_TESLA_K80",
    starting_replica_count: int = 1,
    max_replica_count: int = 1,
    sync: bool = True,
):
    aiplatform.init(project=project, location=location)

    my_model = aiplatform.Model(model_resource_name)

    batch_prediction_job = my_model.batch_predict(
        job_display_name=job_display_name,
        gcs_source=gcs_source,
        gcs_destination_prefix=gcs_destination,
        machine_type=machine_type,
        accelerator_count=accelerator_count,
        accelerator_type=accelerator_type,
        starting_replica_count=starting_replica_count,
        max_replica_count=max_replica_count,
        sync=sync,
    )

    batch_prediction_job.wait()

    print(batch_prediction_job.display_name)
    print(batch_prediction_job.resource_name)
    print(batch_prediction_job.state)
    return batch_prediction_job

## Create source file

In [None]:
%%writefile sample_request_batch.jsonl
{"user_id": "<EXAMPLE_USER_ID>", "merchant_id": "<EXAMPLE_MERCHANT_ID_1>"}
{"user_id": "<EXAMPLE_USER_ID>", "merchant_id": "<EXAMPLE_MERCHANT_ID_2>"}

In [None]:
%%bash

gsutil cp sample_request_batch.jsonl gs://${BUCKET_NAME}/${BUCKET_FOLDER_DIR}

In [None]:
create_batch_prediction_job_dedicated_resources(
    project=PROJECT_ID,
    location=ENDPOINT_VARS['ENDPOINT_LOCATION'],
    model_resource_name=os.environ['MODEL_ID'],
    job_display_name="test-batch-predict-user-ranking",
    gcs_source=f"gs://{BUCKET_NAME}/{BUCKET_FOLDER_DIR}/sample_request_batch.jsonl",
    gcs_destination=f"gs://{BUCKET_NAME}/{BUCKET_FOLDER_DIR}", 
    machine_type="n1-standard-8",
    accelerator_count=0,
    accelerator_type=None,
    starting_replica_count=1,
    max_replica_count=1,
    sync=True
)

### Delete job on the go

In [None]:
def cancel_batch_prediction_job(
    project: str,
    batch_prediction_job_id: str,
    location: str = "us-central1",
    api_endpoint: str = "us-central1-aiplatform.googleapis.com",
):
    """ Source: https://cloud.google.com/vertex-ai/docs/samples/aiplatform-cancel-batch-prediction-job-sample#aiplatform_cancel_batch_prediction_job_sample-python
    """
    # The AI Platform services require regional API endpoints.
    client_options = {"api_endpoint": api_endpoint}
    # Initialize client that will be used to create and send requests.
    # This client only needs to be created once, and can be reused for multiple requests.
    client = aiplatform.gapic.JobServiceClient(client_options=client_options)
    name = client.batch_prediction_job_path(
        project=project, location=location, batch_prediction_job=batch_prediction_job_id
    )
    response = client.cancel_batch_prediction_job(name=name)
    print("response:", response)

In [None]:
cancel_batch_prediction_job(
    project=PROJECT_ID,
    batch_prediction_job_id="<EXAMPLE_PREDICTION_JOB_ID>",
    location=ENDPOINT_VARS['ENDPOINT_LOCATION'],
    api_endpoint=f"{ENDPOINT_VARS['ENDPOINT_LOCATION']}-aiplatform.googleapis.com"
)

## Undeploy a model

In [None]:
def undeploy_model_in_endpoint(
    end_point: str,
    project: str,
    model_id: str,
    location: str = "us-central1",
    api_endpoint: str = "us-central1-aiplatform.googleapis.com",
    timeout: int = 7200,
):
    # The AI Platform services require regional API endpoints.
    client_options = {"api_endpoint": api_endpoint}
    # Initialize client that will be used to create and send requests.
    # This client only needs to be created once, and can be reused for multiple requests.
    client = aiplatform.gapic.EndpointServiceClient(client_options=client_options)
    client_model = aiplatform_v1.services.model_service.ModelServiceClient(client_options=client_options)

    # Get deployed_model_id
    model_name = f'projects/{project}/locations/{location}/models/{model_id}'
    model_request = aiplatform_v1.types.GetModelRequest(name=model_name)
    model_info = client_model.get_model(request=model_request)
    deployed_models_info = model_info.deployed_models
    deployed_model_id=model_info.deployed_models[0].deployed_model_id

    name=f'projects/{project}/locations/{location}/endpoints/{end_point}'

    undeploy_request = aiplatform_v1.types.UndeployModelRequest(endpoint=name,deployed_model_id=deployed_model_id)
    client.undeploy_model(request=undeploy_request)

In [None]:
os.environ['MODEL_ID']

In [None]:
undeploy_model_in_endpoint(
    end_point=os.environ['ENDPOINT_ID'],
    project=PROJECT_ID,
    model_id="<EXAMPLE_MODEL_ID>",
    location=os.environ['ENDPOINT_LOCATION'],
    api_endpoint=f"{ENDPOINT_VARS['ENDPOINT_LOCATION']}-aiplatform.googleapis.com"
)

## Delete a model

In [None]:
# In some cases the model is orphan from the endpoint but somehow can not be deleted on the UI

aiplatform.init(project=PROJECT_ID, location=os.environ['ENDPOINT_LOCATION'])
my_model = aiplatform.Model("<EXAMPLE_MODEL_ID>")

In [None]:
my_model.delete(sync=True)

# Archive

## Create and endpoint

## Upload model

## Deploy model to endpoint