# Vertex AI Python client library for training and deployment of a tabular classification model for online prediction.

## Installation

In [1]:
# Setup the dependencies
import os
IS_GOOGLE_CLOUD_NOTEBOOK = os.path.exists("/opt/deeplearning/metadata/env_version")

USER_FLAG = ""
if IS_GOOGLE_CLOUD_NOTEBOOK:
    USER_FLAG = "--user"

In [2]:
# Install the latest version of the Vertex AI client library
! pip install {USER_FLAG} --upgrade google-cloud-aiplatform

Collecting google-cloud-aiplatform
  Downloading google_cloud_aiplatform-1.3.0-py2.py3-none-any.whl (1.3 MB)
[K     |████████████████████████████████| 1.3 MB 7.6 MB/s eta 0:00:01
Installing collected packages: google-cloud-aiplatform
Successfully installed google-cloud-aiplatform-1.3.0


In [3]:
# Install the Cloud Storage library
! pip install {USER_FLAG} --upgrade google-cloud-storage

Collecting google-cloud-storage
  Downloading google_cloud_storage-1.42.0-py2.py3-none-any.whl (105 kB)
[K     |████████████████████████████████| 105 kB 8.1 MB/s eta 0:00:01
Installing collected packages: google-cloud-storage
Successfully installed google-cloud-storage-1.42.0


In [4]:
# Restart the kernel
import os

if not os.getenv("IS_TESTING"):
    import IPython

    app = IPython.Application.instance()
    app.kernel.do_shutdown(True)

In [None]:
# Set project ID
import os

PROJECT_ID = ""

if not os.getenv("IS_TESTING"):
    shell_output=!gcloud config list --format 'value(core.project)' 2>/dev/null
    PROJECT_ID = shell_output[0]
    print("Project ID: ", PROJECT_ID)

In [3]:
# Use A timestamp to ensure unique resources
from datetime import datetime
TIMESTAMP = datetime.now().strftime("%Y%m%d%H%M%S")

In [4]:
# Creating a Cloud Storage bucket
BUCKET_NAME = "gs://"
REGION = ""  

In [None]:
! gsutil mb -l $REGION $BUCKET_NAME

In [7]:
# validating access to the Cloud Storage bucket by examining its contents
! gsutil ls -al $BUCKET_NAME

In [None]:
# Copy dataset into the Cloud Storage bucket
IMPORT_FILE = "petfinder-tabular-classification_toy.csv"
! gsutil cp gs://cloud-training/mlongcp/v3.0_MLonGC/pdtrust_toy_datasets/{IMPORT_FILE} {BUCKET_NAME}/data/

gcs_source = f"{BUCKET_NAME}/data/{IMPORT_FILE}"

In [9]:
# Importing Vertex SDK for Python
import os

from google.cloud import aiplatform

aiplatform.init(project=PROJECT_ID, location=REGION)

## Creating AutoML Tabular model

In [10]:
# Creating a Managed Tabular Dataset from a CSV

ds = dataset = aiplatform.TabularDataset.create(
    display_name="petfinder-tabular-dataset",
    gcs_source=gcs_source,
)

ds.resource_name

INFO:google.cloud.aiplatform.datasets.dataset:Creating TabularDataset
INFO:google.cloud.aiplatform.datasets.dataset:Create TabularDataset backing LRO: projects/1075205415941/locations/us-central1/datasets/1945247175768276992/operations/1110822578768838656
INFO:google.cloud.aiplatform.datasets.dataset:TabularDataset created. Resource name: projects/1075205415941/locations/us-central1/datasets/1945247175768276992
INFO:google.cloud.aiplatform.datasets.dataset:To use this TabularDataset in another session:
INFO:google.cloud.aiplatform.datasets.dataset:ds = aiplatform.TabularDataset('projects/1075205415941/locations/us-central1/datasets/1945247175768276992')


'projects/1075205415941/locations/us-central1/datasets/1945247175768276992'

In [11]:
# Launching a Training Job to Create a Model

# Constructing a AutoML Tabular Training Job
job =
    display_name="train-petfinder-automl-1",
    optimization_prediction_type="classification",
    column_transformations=[
        {"categorical": {"column_name": "Type"}},
        {"numeric": {"column_name": "Age"}},
        {"categorical": {"column_name": "Breed1"}},
        {"categorical": {"column_name": "Color1"}},
        {"categorical": {"column_name": "Color2"}},
        {"categorical": {"column_name": "MaturitySize"}},
        {"categorical": {"column_name": "FurLength"}},
        {"categorical": {"column_name": "Vaccinated"}},
        {"categorical": {"column_name": "Sterilized"}},
        {"categorical": {"column_name": "Health"}},
        {"numeric": {"column_name": "Fee"}},
        {"numeric": {"column_name": "PhotoAmt"}},
    ],
)

# Creating and training the model object
model =
    dataset=ds,
    target_column="Adopted",
    # Define training, validation and test fraction for training
    model_display_name="adopted-prediction-model",
    disable_early_stopping=False,
)

  and should_run_async(code)
  app.launch_new_instance()


INFO:google.cloud.aiplatform.training_jobs:View Training:
https://console.cloud.google.com/ai/platform/locations/us-central1/training/1715908841423503360?project=1075205415941
INFO:google.cloud.aiplatform.training_jobs:AutoMLTabularTrainingJob projects/1075205415941/locations/us-central1/trainingPipelines/1715908841423503360 current state:
PipelineState.PIPELINE_STATE_RUNNING
INFO:google.cloud.aiplatform.training_jobs:AutoMLTabularTrainingJob projects/1075205415941/locations/us-central1/trainingPipelines/1715908841423503360 current state:
PipelineState.PIPELINE_STATE_RUNNING
INFO:google.cloud.aiplatform.training_jobs:AutoMLTabularTrainingJob projects/1075205415941/locations/us-central1/trainingPipelines/1715908841423503360 current state:
PipelineState.PIPELINE_STATE_RUNNING
INFO:google.cloud.aiplatform.training_jobs:AutoMLTabularTrainingJob projects/1075205415941/locations/us-central1/trainingPipelines/1715908841423503360 current state:
PipelineState.PIPELINE_STATE_RUNNING
INFO:google.

In [12]:
# Deploying the model resource to the serving endpoint resource 
endpoint =
    model.deploy(
     deployed_model_display_name='model-endpoint',
     traffic_split={"0": 100},
     machine_type="n1-standard-4",
     accelerator_count=0,
     min_replica_count=1,
     max_replica_count=1,
   )


  and should_run_async(code)


INFO:google.cloud.aiplatform.models:Creating Endpoint
INFO:google.cloud.aiplatform.models:Create Endpoint backing LRO: projects/1075205415941/locations/us-central1/endpoints/7467372802459303936/operations/7965582686603444224
INFO:google.cloud.aiplatform.models:Endpoint created. Resource name: projects/1075205415941/locations/us-central1/endpoints/7467372802459303936
INFO:google.cloud.aiplatform.models:To use this Endpoint in another session:
INFO:google.cloud.aiplatform.models:endpoint = aiplatform.Endpoint('projects/1075205415941/locations/us-central1/endpoints/7467372802459303936')
INFO:google.cloud.aiplatform.models:Deploying model to Endpoint : projects/1075205415941/locations/us-central1/endpoints/7467372802459303936
INFO:google.cloud.aiplatform.models:Deploy Endpoint model backing LRO: projects/1075205415941/locations/us-central1/endpoints/7467372802459303936/operations/2903536705439006720
INFO:google.cloud.aiplatform.models:Endpoint model deployed. Resource name: projects/107520

In [13]:
# Making prediction using the sample values 
prediction = endpiont.predict(
    [
        {
            "Type": "Cat",
            "Age": "3",
            "Breed1": "Tabby",
            "Gender": "Male",
            "Color1": "Black",
            "Color2": "White",
            "MaturitySize": "Small",
            "FurLength": "Short",
            "Vaccinated": "No",
            "Sterilized": "No",
            "Health": "Healthy",
            "Fee": "100",
            "PhotoAmt": "2",
        }
    ]
)

print(prediction)

  and should_run_async(code)


Prediction(predictions=[{'classes': ['Yes', 'No'], 'scores': [0.527707576751709, 0.4722923934459686]}], deployed_model_id='3521401492231684096', explanations=None)


In [14]:
endpoint.undeploy_all()

INFO:google.cloud.aiplatform.models:Undeploying Endpoint model: projects/1075205415941/locations/us-central1/endpoints/7467372802459303936


  and should_run_async(code)


INFO:google.cloud.aiplatform.models:Undeploy Endpoint model backing LRO: projects/1075205415941/locations/us-central1/endpoints/7467372802459303936/operations/1845190793006940160
INFO:google.cloud.aiplatform.models:Endpoint model undeployed. Resource name: projects/1075205415941/locations/us-central1/endpoints/7467372802459303936


In [15]:
# Cleaning up

delete_training_job = True
delete_model = True
delete_endpoint = True

delete_bucket = False

job.delete()

model.delete()

endpoint.delete()

if delete_bucket and "BUCKET_NAME" in globals():
    ! gsutil -m rm -r $BUCKET_NAME

INFO:google.cloud.aiplatform.base:Deleting AutoMLTabularTrainingJob : projects/1075205415941/locations/us-central1/trainingPipelines/1715908841423503360
INFO:google.cloud.aiplatform.base:Delete AutoMLTabularTrainingJob  backing LRO: projects/1075205415941/locations/us-central1/operations/5317466105709592576
INFO:google.cloud.aiplatform.base:AutoMLTabularTrainingJob deleted. . Resource name: projects/1075205415941/locations/us-central1/trainingPipelines/1715908841423503360
INFO:google.cloud.aiplatform.base:Deleting Model : projects/1075205415941/locations/us-central1/models/3676687718445744128


  and should_run_async(code)


INFO:google.cloud.aiplatform.base:Delete Model  backing LRO: projects/1075205415941/locations/us-central1/operations/8046647479896113152
INFO:google.cloud.aiplatform.base:Model deleted. . Resource name: projects/1075205415941/locations/us-central1/models/3676687718445744128
INFO:google.cloud.aiplatform.base:Deleting Endpoint : projects/1075205415941/locations/us-central1/endpoints/7467372802459303936
INFO:google.cloud.aiplatform.base:Delete Endpoint  backing LRO: projects/1075205415941/locations/us-central1/operations/6456876811434328064
INFO:google.cloud.aiplatform.base:Endpoint deleted. . Resource name: projects/1075205415941/locations/us-central1/endpoints/7467372802459303936
