# Vertex Tabular Binary Classification with .HyperparameterTuningJob()

<center><img src="../images/03.png"/></center>

## Set Constants

In [1]:
PROJECT_ID = 'jchavezar-demo'
REGION = 'us-central1'
TRAIN_DATASET_URI = 'gs://vtx-datasets-public/ecommerce/train.csv'
VAL_DATASET_URI = 'gs://vtx-datasets-public/ecommerce/val.csv'
TEST_DATASET_URI = 'gs://vtx-datasets-public/ecommerce/test.csv'
MODEL_URI = 'gs://vtx-models/ecommerce/03cc'
STAGING_URI = 'gs://vtx-staging/ecommerce/'
TRAIN_IMAGE_URI = f'gcr.io/{PROJECT_ID}/03cc-tf-hpt-xai-train:latest'
PREDICTION_IMAGE_URI = 'us-docker.pkg.dev/vertex-ai/prediction/tf2-cpu.2-9:latest'
SERIES = '03cc'
EXPERIMENT_NAME = 'ecommerce-exp'
EPOCHS = 10
BATCH_SIZE = 100

## Create Folder Structure

```
source
     |  Dockerfile
     |
     └─── trainer
          |  train.py
          |

```

In [2]:
!rm -fr source
!mkdir -p source/trainer

## Create Python Source Distribution Files 

In [3]:
%%writefile source/trainer/train.py

import os
import time
import warnings
import argparse
import hypertune
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras import layers
warnings.filterwarnings('ignore')

################################### ARGUMENTS #######################################


parser = argparse.ArgumentParser()
parser.add_argument('--train_data_uri', help = 'dataset to train', type = str)
parser.add_argument('--val_data_uri',  help = 'val to train', type = str)
parser.add_argument('--test_data_uri', help = 'test to train', type = str)
parser.add_argument('--epochs', dest = 'epochs', default = 2, type = int, help = 'Number of Epochs')
parser.add_argument('--batch_size', dest = 'batch_size', default = 32, type = int, help = 'Batch Size')
parser.add_argument('--lr', dest='learning_rate', required=True, type=float, help='Learning Rate')
args = parser.parse_args()

train_df = pd.read_csv(args.train_data_uri)
val_df = pd.read_csv(args.val_data_uri)
test_df = pd.read_csv(args.test_data_uri)

################################### PREPROCESSING #######################################

## Convert pandas dataframe to tensor data (from GCS to TF.data.Data)
init_start = time.process_time()
def df_to_dataset(dataframe, shuffle=None):
    df = dataframe.copy()
    labels = df.pop('will_buy_on_return_visit')
    df = {key: value[:, tf.newaxis] for key, value in dataframe.items()}
    ds = tf.data.Dataset.from_tensor_slices((dict(df), labels))
    if shuffle:
        ds = ds.shuffle(buffer_size=len(dataframe))
    ds = ds.batch(batch_size)
    ds = ds.prefetch(batch_size)
    return ds
      
## Normalization / Standarization
def get_normalization_layer(name, dataset):
    start = time.process_time()
    normalizer = layers.Normalization(axis=None)
    feature_ds = dataset.map(lambda x, y: x[name])
    normalizer.adapt(feature_ds)
    print(f'Normalization time for {name}: {time.process_time() - start}')
    return normalizer

# Performs feature-wise categorical encoding of inputs features
def get_category_encoding_layer(name, dataset, dtype, max_tokens=None):
    start = time.process_time()
    if dtype == 'string':
        index = layers.StringLookup(max_tokens=max_tokens)
    else:
        index = layers.IntegerLookup(max_tokens=max_tokens)
    feature_ds = dataset.map(lambda x, y: x[name])
    index.adapt(feature_ds)
    encoder = layers.CategoryEncoding(num_tokens=index.vocabulary_size())
    print(f'Encoding time for {name}: {time.process_time() - start}')
    return lambda feature: encoder(index(feature))

batch_size = args.batch_size
train_ds = df_to_dataset(train_df)
val_ds = df_to_dataset(val_df)
test_ds = df_to_dataset(test_df)

## Identify Numerical and Categorical columns:
num_columns = ['latest_ecommerce_progress', 'time_on_site', 'pageviews']
cat_columns = ['source', 'medium', 'channelGrouping', 'deviceCategory', 'country']
num_cat_columns = 'bounces'

all_inputs = []
encoded_features = []

# Numerical Features.
for header in num_columns:
    numeric_col = tf.keras.Input(shape=(1,), name=header)
    normalization_layer = get_normalization_layer(header, train_ds)
    encoded_numeric_col = normalization_layer(numeric_col)
    all_inputs.append(numeric_col)
    encoded_features.append(encoded_numeric_col)
    
# Categorical Features.
for header in cat_columns:
    categorical_col = tf.keras.Input(shape=(1,), name=header, dtype='string')
    encoding_layer = get_category_encoding_layer(name=header,
                                                 dataset=train_ds,
                                                 dtype='string',
                                                 max_tokens=5)
    encoded_categorical_col = encoding_layer(categorical_col)
    all_inputs.append(categorical_col)
    encoded_features.append(encoded_categorical_col)

## Integer values into integer indices.
bounces_col = tf.keras.Input(shape=(1,), name=num_cat_columns, dtype='int64')

encoding_layer = get_category_encoding_layer(name=num_cat_columns,
                                             dataset=train_ds,
                                             dtype='int64',
                                             max_tokens=5)
encoded_age_col = encoding_layer(bounces_col)
all_inputs.append(bounces_col)
encoded_features.append(encoded_age_col)

print(f'Total preprocessing time: {time.process_time() - init_start}')

#########################################################################################


################################### CREATE, COMPILE AND TRAIN MODEL #####################

all_features = tf.keras.layers.concatenate(encoded_features)
x = tf.keras.layers.Dense(32, activation="relu")(all_features)
x = tf.keras.layers.Dropout(0.5)(x)
output = tf.keras.layers.Dense(1)(x)

model = tf.keras.Model(all_inputs, output)

model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate = args.learning_rate),
              loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
              metrics = ['accuracy'])
################################## SETUP HYPERPARAMETER METRICS AND TRAIN #####################


history = model.fit(train_ds, epochs=args.epochs, validation_data=val_ds)
loss, accuracy = model.evaluate(test_ds)
print("acccuracy: ", accuracy)

hp_metric = history.history['val_accuracy'][-1]

hpt = hypertune.HyperTune()
hpt.report_hyperparameter_tuning_metric(
    hyperparameter_metric_tag='accuracy',
    metric_value=hp_metric,
    global_step=args.epochs)


################################### SAVE MODEL ##########################################

model.save(os.environ['AIP_MODEL_DIR'])

Writing source/trainer/train.py


In [4]:
%%writefile source/Dockerfile

FROM python:3.9.12

COPY . /

RUN pip install tensorflow && \
    pip install google-cloud-storage && \
    pip install pandas && \
    pip install gcsfs && \
    pip install cloudml-hypertune

ENTRYPOINT ["python", "trainer/train.py"]

Writing source/Dockerfile


In [5]:
!gcloud builds submit -t $TRAIN_IMAGE_URI source/.

Creating temporary tarball archive of 2 file(s) totalling 5.7 KiB before compression.
Uploading tarball of [source/.] to [gs://jchavezar-demo_cloudbuild/source/1669855481.213354-2a89d9f00b36411aaae66173d1419d0e.tgz]
Created [https://cloudbuild.googleapis.com/v1/projects/jchavezar-demo/locations/global/builds/277506c7-e8bb-4c24-b11e-f97190d368e5].
Logs are available at [ https://console.cloud.google.com/cloud-build/builds/277506c7-e8bb-4c24-b11e-f97190d368e5?project=569083142710 ].
----------------------------- REMOTE BUILD OUTPUT ------------------------------
starting build "277506c7-e8bb-4c24-b11e-f97190d368e5"

FETCHSOURCE
Fetching storage object: gs://jchavezar-demo_cloudbuild/source/1669855481.213354-2a89d9f00b36411aaae66173d1419d0e.tgz#1669855481459554
Copying gs://jchavezar-demo_cloudbuild/source/1669855481.213354-2a89d9f00b36411aaae66173d1419d0e.tgz#1669855481459554...
/ [1 files][  2.2 KiB/  2.2 KiB]                                                
Operation completed over 1 ob

## Create Vertex Tensorboard

In [6]:
from google.cloud import aiplatform as aip

tb = aip.Tensorboard.list(filter=f"labels.series={SERIES}")
if tb:
    tb = tb[0]
else: 
    tb = aip.Tensorboard.create(display_name=SERIES, labels={'series' : f'{SERIES}'})

## Create Vertex Training from Code [HyperParameterTuninJob]

In [7]:
CMDARGS = [
    "--train_data_uri="+TRAIN_DATASET_URI,
    "--val_data_uri="+VAL_DATASET_URI,
    "--test_data_uri="+TEST_DATASET_URI,
    "--epochs=" + str(EPOCHS),
    "--batch_size=" + str(BATCH_SIZE),
]

MACHINE_SPEC = {
    "machine_type": "n1-standard-4",
    "accelerator_count": 0
}

WORKER_POOL_SPEC = [
    {
        "replica_count": 1,
        "machine_spec": MACHINE_SPEC,
        "container_spec": {
            "image_uri": TRAIN_IMAGE_URI,
            "command": [],
            "args": CMDARGS
        }
    }
]

In [8]:
customJob = aip.CustomJob(
    display_name = f'03cc-tfkeras-hpt-customjob',
    worker_pool_specs = WORKER_POOL_SPEC,
    base_output_dir = MODEL_URI,
    staging_bucket = STAGING_URI,
)

In [9]:
METRIC_SPEC = {
    "accuracy": "maximize"
}


PARAMETER_SPEC = {
    "lr": aip.hyperparameter_tuning.DoubleParameterSpec(min=0.001, max=0.1, scale="log"),
}

In [10]:
tuningJob = aip.HyperparameterTuningJob(
    display_name = f'03cc-tfkeras-hpt-customjob',
    custom_job = customJob,
    metric_spec = METRIC_SPEC,
    parameter_spec = PARAMETER_SPEC,
    max_trial_count = 18,
    parallel_trial_count = 3,
    search_algorithm = None,
)

In [None]:
tuningJob.run(
    service_account = 'vtx-pipe@jchavezar-demo.iam.gserviceaccount.com',
    tensorboard = tb.resource_name
)

Creating HyperparameterTuningJob
HyperparameterTuningJob created. Resource name: projects/569083142710/locations/us-central1/hyperparameterTuningJobs/4178249876104019968
To use this HyperparameterTuningJob in another session:
hpt_job = aiplatform.HyperparameterTuningJob.get('projects/569083142710/locations/us-central1/hyperparameterTuningJobs/4178249876104019968')
View HyperparameterTuningJob:
https://console.cloud.google.com/ai/platform/locations/us-central1/training/4178249876104019968?project=569083142710
View Tensorboard:
https://us-central1.tensorboard.googleusercontent.com/experiment/projects+569083142710+locations+us-central1+tensorboards+2851139203939434496+experiments+4178249876104019968
HyperparameterTuningJob projects/569083142710/locations/us-central1/hyperparameterTuningJobs/4178249876104019968 current state:
JobState.JOB_STATE_PENDING
HyperparameterTuningJob projects/569083142710/locations/us-central1/hyperparameterTuningJobs/4178249876104019968 current state:
JobState.JO

## Get Model Information from Tensorflow Graph

In [None]:
import tensorflow as tf
import warnings
warnings.filterwarnings('ignore')

loaded_model = tf.keras.models.load_model(f"{MODEL_URI}/model")
tf.keras.utils.plot_model(loaded_model, show_shapes=True, rankdir="LR")

## Build Explainable AI Metadata from Graph

In [None]:
infer = loaded_model.signatures['serving_default']
print(infer.inputs)
print()
print(infer.structured_outputs)

In [None]:
EXPLANATION_METADATA = {
    "outputs": {
        "logit": { 
            "output_tensor_name": "dense_1"
        }
    },
    "inputs": { 
        "latest_ecommerce_progress": {},
        "bounces": {},
        "time_on_site": {},
        "pageviews": {},
        "source": {},
        "medium": {},
        "channelGrouping": {},
        "deviceCategory": {},
        "country": {},
    }
}
EXPLANATION_PARAMS = {"sampled_shapley_attribution": {"path_count": 5}}

## Upload Model

In [None]:
model = aip.Model.upload(
    display_name = 'ecommerce_tf',
    serving_container_image_uri = PREDICTION_IMAGE_URI,
    artifact_uri = f'{MODEL_URI}',
    explanation_parameters=EXPLANATION_PARAMS,
    explanation_metadata=EXPLANATION_METADATA,
)

## Deploy Model On Endpoint

In [None]:
endpoint = model.deploy(
    deployed_model_display_name = 'ecommerce_tf_ep_dep',
    traffic_percentage = 100,
    machine_type = 'n1-standard-4',
    min_replica_count = 1,
    max_replica_count = 1,
    explanation_metadata=EXPLANATION_METADATA,
    explanation_parameters=EXPLANATION_PARAMS
)

## Testing Predictions

In [None]:
instance = {
    'latest_ecommerce_progress': [0],
    'bounces': [0],
    'time_on_site': [103],
    'pageviews': [3],
    'source': ['youtube.com'],
    'medium': ['referral'],
    'channelGrouping': ['Social'],
    'deviceCategory': ['desktop'],
    'country': ['Vietnam'],
}

In [None]:
endpoint.predict([instance])

In [None]:
explanations = endpoint.explain([instance])
print("Explainable predictions:", explanations)

In [None]:
import pandas as pd
import matplotlib.pyplot as plt

results = {k:v for k,v in explanations[4][0].attributions[0].feature_attributions.items()}
names = list(results.keys())
values = []
for i in results.values():
    values.append(i.pop())
    
plt.barh(range(len(results)), values, tick_label=names)

## Cleaning

In [None]:
!rm -f source.tar source.tar.gz
!rm -fr source