In [2]:
from datetime import datetime
import google.cloud.aiplatform as aip

In [6]:
PROJECT_ID="felipe-sandbox"
REGION="us-central1"
BUCKET_NAME = "felipe-sandbox-bucket"  # @param {type:"string"}
BUCKET_URI = f"gs://{BUCKET_NAME}"
TIMESTAMP = datetime.now().strftime("%Y%m%d%H%M%S")
TRAIN_VERSION=1
TRAIN_VERSION="scikit-learn-cpu.0-23"
TRAIN_IMAGE = "{}-docker.pkg.dev/vertex-ai/training/{}:latest".format(
    REGION.split("-")[0], TRAIN_VERSION
)
DEPLOY_VERSION=1
DEPLOY_IMAGE = "{}-docker.pkg.dev/vertex-ai/prediction/{}:latest".format(
    REGION.split("-")[0], DEPLOY_VERSION
)
MODEL_DIR = "{}/{}".format(BUCKET_URI, TIMESTAMP)

DIRECT = False
if DIRECT:
    CMDARGS = ["--model_dir=" + MODEL_DIR]
else:
    CMDARGS = []
TRAIN_GPU=0
MACHINE_TYPE="n1-standard"
VCPU=2
TRAIN_COMPUTE = MACHINE_TYPE + "-" + str(VCPU)
DEPLOY_GPU=0

In [35]:
print(TRAIN_IMAGE)


us-docker.pkg.dev/vertex-ai/training/scikit-learn-cpu.0-23:latest


In [2]:
# Make folder for Python training script
! rm -rf custom
! mkdir custom

# Add package information
! touch custom/README.md

setup_cfg = "[egg_info]\n\ntag_build =\n\ntag_date = 0"
! echo "$setup_cfg" > custom/setup.cfg

setup_py = "import setuptools\n\nsetuptools.setup(\n\n    install_requires=[\n\n        'wget',\n\n        'cloudml-hypertune',\n\n    ],\n\n    packages=setuptools.find_packages())"
! echo "$setup_py" > custom/setup.py

pkg_info = "Metadata-Version: 1.0\n\nName: CIFAR10 \n\nVersion: 0.0.0\n\nSummary: Demostration training script\n\nHome-page: www.google.com\n\nAuthor: Google\n\nAuthor-email: aferlitsch@google.com\n\nLicense: Public\n\nDescription: Demo\n\nPlatform: Vertex"
! echo "$pkg_info" > custom/PKG-INFO

# Make the training subfolder
! mkdir custom/trainer
! touch custom/trainer/__init__.py

In [3]:
%%writefile custom/trainer/task.py
# Single Instance Training for Census Income

from sklearn.ensemble import RandomForestRegressor
import joblib
from sklearn.feature_selection import SelectKBest
from sklearn.pipeline import FeatureUnion
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import LabelBinarizer
import datetime
import pandas as pd

from google.cloud import storage

import numpy as np
import argparse
import os
import sys

parser = argparse.ArgumentParser()
parser.add_argument('--model-dir', dest='model_dir',
                    default=os.getenv('AIP_MODEL_DIR'), type=str, help='Model dir.')
args = parser.parse_args()

print('Python Version = {}'.format(sys.version))

# Public bucket holding the census data
bucket = storage.Client().bucket('cloud-samples-data')

# Path to the data inside the public bucket
blob = bucket.blob('ai-platform/sklearn/census_data/adult.data')
# Download the data
blob.download_to_filename('adult.data')

# Define the format of your input data including unused columns (These are the columns from the census data files)
COLUMNS = (
    'age',
    'workclass',
    'fnlwgt',
    'education',
    'education-num',
    'marital-status',
    'occupation',
    'relationship',
    'race',
    'sex',
    'capital-gain',
    'capital-loss',
    'hours-per-week',
    'native-country',
    'income-level'
)



# Categorical columns are columns that need to be turned into a numerical value to be used by scikit-learn
CATEGORICAL_COLUMNS = (
    'workclass',
    'education',
    'marital-status',
    'occupation',
    'relationship',
    'race',
    'sex',
    'native-country'
)

# Load the training census dataset
with open('./adult.data', 'r') as train_data:
    raw_training_data = pd.read_csv(train_data, header=None, names=COLUMNS)

# Remove the column we are trying to predict ('income-level') from our features list
# Convert the Dataframe to a lists of lists
train_features = raw_training_data.drop('income-level', axis=1).values.tolist()
# Create our training labels list, convert the Dataframe to a lists of lists
train_labels = (raw_training_data['income-level'] == ' >50K').values.tolist()

# Since the census data set has categorical features, we need to convert
# them to numerical values. We'll use a list of pipelines to convert each
# categorical column and then use FeatureUnion to combine them before calling
# the RandomForestClassifier.
categorical_pipelines = []

# Each categorical column needs to be extracted individually and converted to a numerical value.
# To do this, each categorical column will use a pipeline that extracts one feature column via
# SelectKBest(k=1) and a LabelBinarizer() to convert the categorical value to a numerical one.
# A scores array (created below) will select and extract the feature column. The scores array is
# created by iterating over the COLUMNS and checking if it is a CATEGORICAL_COLUMN.
for i, col in enumerate(COLUMNS[:-1]):
    if col in CATEGORICAL_COLUMNS:
        # Create a scores array to get the individual categorical column.
        # Example:
        #  data = [39, 'State-gov', 77516, 'Bachelors', 13, 'Never-married', 'Adm-clerical',
        #         'Not-in-family', 'White', 'Male', 2174, 0, 40, 'United-States']
        #  scores = [0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
        #
        # Returns: [['State-gov']]
        # Build the scores array.
        scores = [0] * len(COLUMNS[:-1])
        # This column is the categorical column we want to extract.
        scores[i] = 1
        skb = SelectKBest(k=1)
        skb.scores_ = scores
        # Convert the categorical column to a numerical value
        lbn = LabelBinarizer()
        r = skb.transform(train_features)
        lbn.fit(r)
        # Create the pipeline to extract the categorical feature
        categorical_pipelines.append(
            ('categorical-{}'.format(i), Pipeline([
                ('SKB-{}'.format(i), skb),
                ('LBN-{}'.format(i), lbn)])))

# Create pipeline to extract the numerical features
skb = SelectKBest(k=6)
# From COLUMNS use the features that are numerical
skb.scores_ = [1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0]
categorical_pipelines.append(('numerical', skb))

# Combine all the features using FeatureUnion
preprocess = FeatureUnion(categorical_pipelines)

# Create the regressor
regressor = RandomForestRegressor()

# Transform the features and fit them to the classifier
regressor.fit(preprocess.transform(train_features), train_labels)

# Create the overall model as a single pipeline
pipeline = Pipeline([
    ('union', preprocess),
    ('classifier', regressor)
])

# Split path into bucket and subdirectory
bucket = args.model_dir.split('/')[2]
subdirs = args.model_dir.split('/')[3:]
subdir = subdirs[0]
subdirs.pop(0)
for comp in subdirs:
    subdir = os.path.join(subdir, comp)

# Write model to a local file
joblib.dump(pipeline, 'model.joblib')

# Upload the model to GCS
bucket = storage.Client().bucket(bucket)
blob = bucket.blob(subdir + '/model.joblib')
blob.upload_from_filename('model.joblib')

Writing custom/trainer/task.py


In [4]:
! rm -f custom.tar custom.tar.gz
! tar cvf custom.tar custom
! gzip custom.tar
! gsutil cp custom.tar.gz $BUCKET_URI/trainer_cifar10.tar.gz

custom/
custom/trainer/
custom/trainer/task.py
custom/trainer/__init__.py
custom/PKG-INFO
custom/setup.cfg
custom/setup.py
custom/README.md
Copying file://custom.tar.gz [Content-Type=application/x-tar]...
/ [1 files][  2.5 KiB/  2.5 KiB]                                                
Operation completed over 1 objects/2.5 KiB.                                      


In [3]:
DISPLAY_NAME = "cifar10_" + TIMESTAMP

job = aip.CustomPythonPackageTrainingJob(
    display_name=DISPLAY_NAME,
    python_package_gcs_uri=f"{BUCKET_URI}/trainer_cifar10.tar.gz",
    python_module_name="trainer.task",
    container_uri="us-docker.pkg.dev/vertex-ai/training/scikit-learn-cpu.0-23:latest",
    model_serving_container_image_uri="us-docker.pkg.dev/vertex-ai/prediction/sklearn-cpu.1-0:latest",
    project=PROJECT_ID,
    staging_bucket=BUCKET_URI
)



In [4]:

model = job.run(
    model_display_name="cifar10_" + TIMESTAMP,
    args=CMDARGS,
    replica_count=1,
    machine_type="n1-standard-4",
    base_output_dir=MODEL_DIR,
    sync=False,
)


model_path_to_deploy = MODEL_DIR

model.wait()

INFO:google.cloud.aiplatform.training_jobs:Training Output directory:
gs://felipe-sandbox-bucket/20220601170612 
INFO:google.cloud.aiplatform.training_jobs:View Training:
https://console.cloud.google.com/ai/platform/locations/us-central1/training/7415682012058484736?project=581970904807
INFO:google.cloud.aiplatform.training_jobs:CustomPythonPackageTrainingJob projects/581970904807/locations/us-central1/trainingPipelines/7415682012058484736 current state:
PipelineState.PIPELINE_STATE_RUNNING
INFO:google.cloud.aiplatform.training_jobs:View backing custom job:
https://console.cloud.google.com/ai/platform/locations/us-central1/training/3010431500769296384?project=581970904807
INFO:google.cloud.aiplatform.training_jobs:CustomPythonPackageTrainingJob projects/581970904807/locations/us-central1/trainingPipelines/7415682012058484736 current state:
PipelineState.PIPELINE_STATE_RUNNING
INFO:google.cloud.aiplatform.training_jobs:CustomPythonPackageTrainingJob projects/581970904807/locations/us-ce

In [5]:
!gsutil ls gs://felipe-sandbox-bucket/20220601163953 

CommandException: One or more URLs matched no objects.


In [6]:
_job = job.list(filter=f"display_name={DISPLAY_NAME}")
print(_job)

E0601 17:07:38.407460554       1 fork_posix.cc:70]           Fork support is only compatible with the epoll1 and poll polling strategies
E0601 17:07:40.002344032       1 fork_posix.cc:70]           Fork support is only compatible with the epoll1 and poll polling strategies


[<google.cloud.aiplatform.training_jobs.CustomPythonPackageTrainingJob object at 0x7f6c40293e90> 
resource name: projects/581970904807/locations/us-central1/trainingPipelines/7415682012058484736]


INFO:google.cloud.aiplatform.training_jobs:CustomPythonPackageTrainingJob projects/581970904807/locations/us-central1/trainingPipelines/7415682012058484736 current state:
PipelineState.PIPELINE_STATE_RUNNING
INFO:google.cloud.aiplatform.training_jobs:CustomPythonPackageTrainingJob projects/581970904807/locations/us-central1/trainingPipelines/7415682012058484736 current state:
PipelineState.PIPELINE_STATE_RUNNING
INFO:google.cloud.aiplatform.training_jobs:CustomPythonPackageTrainingJob run completed. Resource name: projects/581970904807/locations/us-central1/trainingPipelines/7415682012058484736
INFO:google.cloud.aiplatform.training_jobs:Model available at projects/581970904807/locations/us-central1/models/5500148588275040256


In [4]:
XAI = "shapley"  # [ shapley, ig, xrai ]

if XAI == "shapley":
    PARAMETERS = {"sampled_shapley_attribution": {"path_count": 10}}
elif XAI == "ig":
    PARAMETERS = {"integrated_gradients_attribution": {"step_count": 50}}
elif XAI == "xrai":
    PARAMETERS = {"xrai_attribution": {"step_count": 50}}

parameters = aip.explain.ExplanationParameters(PARAMETERS)

COLUMNS = [
    "age",
    "workclass",
    "fnlwgt",
    "education",
    "education-num",
    "marital-status",
    "occupation",
    "relationship",
    "race",
    "sex",
    "capital-gain",
    "capital-loss",
    "hours-per-week",
    "native-country",
]
metadata = aip.explain.ExplanationMetadata(
    inputs={
        "features": {"index_feature_mapping": COLUMNS, "encoding": "BAG_OF_FEATURES"}
    },
    outputs={"income": {}},
)

MODEL_DIR = MODEL_DIR + "/model"

DEPLOY_VERSION = "sklearn-cpu.0-23"
DEPLOY_IMAGE = "{}-docker.pkg.dev/vertex-ai/prediction/{}:latest".format(
    REGION.split("-")[0], DEPLOY_VERSION
)

In [None]:
model = aip.Model.upload(
    display_name="cifar10_" + TIMESTAMP,
    artifact_uri=MODEL_DIR,
    serving_container_image_uri=DEPLOY_IMAGE,
    explanation_parameters=parameters,
    explanation_metadata=metadata,
    sync=False,
)

model.wait()

E0601 17:09:47.748983373      68 fork_posix.cc:70]           Fork support is only compatible with the epoll1 and poll polling strategies


INFO:google.cloud.aiplatform.models:Creating Model
INFO:google.cloud.aiplatform.models:Create Model backing LRO: projects/581970904807/locations/us-central1/models/8523189838147485696/operations/1788273817680871424


In [8]:
model = aip.Model("8523189838147485696")

In [11]:
DEPLOYED_NAME = "cifar10-" + TIMESTAMP

TRAFFIC_SPLIT = {"0": 100}

MIN_NODES = 1
MAX_NODES = 1

if DEPLOY_GPU:
    endpoint = model.deploy(
        deployed_model_display_name=DEPLOYED_NAME,
        traffic_split=TRAFFIC_SPLIT,
        machine_type=DEPLOY_COMPUTE,
        accelerator_type=DEPLOY_GPU.name,
        accelerator_count=DEPLOY_NGPU,
        min_replica_count=MIN_NODES,
        max_replica_count=MAX_NODES,
    )
else:
    endpoint = model.deploy(
        deployed_model_display_name=DEPLOYED_NAME,
        traffic_split=TRAFFIC_SPLIT,
        machine_type="n1-standard-4",
        min_replica_count=1,
        max_replica_count=1,
    )

INFO:google.cloud.aiplatform.models:Creating Endpoint
INFO:google.cloud.aiplatform.models:Create Endpoint backing LRO: projects/581970904807/locations/us-central1/endpoints/8845236793923076096/operations/6005894868713340928
INFO:google.cloud.aiplatform.models:Endpoint created. Resource name: projects/581970904807/locations/us-central1/endpoints/8845236793923076096
INFO:google.cloud.aiplatform.models:To use this Endpoint in another session:
INFO:google.cloud.aiplatform.models:endpoint = aiplatform.Endpoint('projects/581970904807/locations/us-central1/endpoints/8845236793923076096')
INFO:google.cloud.aiplatform.models:Deploying model to Endpoint : projects/581970904807/locations/us-central1/endpoints/8845236793923076096
INFO:google.cloud.aiplatform.models:Deploy Endpoint model backing LRO: projects/581970904807/locations/us-central1/endpoints/8845236793923076096/operations/8043773700098490368
INFO:google.cloud.aiplatform.models:Endpoint model deployed. Resource name: projects/58197090480

In [12]:
INSTANCE = [
    31,
    "Private",
    45781,
    "Masters",
    14,
    "Never-married",
    "Prof-specialty",
    "Not-in-family",
    "White",
    "Female",
    14084,
    0,
    50,
    "United-States",
]
instances = [INSTANCE]

In [13]:
endpoint.predict(instances=instances)


Prediction(predictions=[1.0], deployed_model_id='151987691330732032', explanations=None)

In [15]:
prediction = endpoint.explain(instances=instances)
print(prediction)

Prediction(predictions=[1.0], deployed_model_id='151987691330732032', explanations=[attributions {
  instance_output_value: 1.0
  feature_attributions {
    struct_value {
      fields {
        key: "age"
        value {
          list_value {
            values {
              number_value: 0.274
            }
          }
        }
      }
      fields {
        key: "capital-gain"
        value {
          list_value {
            values {
              number_value: 0.4229999999999999
            }
          }
        }
      }
      fields {
        key: "capital-loss"
        value {
          list_value {
            values {
              number_value: 0.0
            }
          }
        }
      }
      fields {
        key: "education"
        value {
          list_value {
            values {
              number_value: 0.0
            }
          }
        }
      }
      fields {
        key: "education-num"
        value {
          list_value {
            values {
   