In [None]:
#####################################################################
#
#   aiplatform.CustomPythonPackageTrainingJob -> python application
#
# https://github.com/GoogleCloudPlatform/vertex-ai-samples/blob/main/notebooks/community/ml_ops/stage2/get_started_vertex_training.ipynb
#
#####################################################################

In [None]:
#####################################################################
#
# create python application structure
#
#####################################################################

In [None]:
! mkdir -p mytrainingpackage/trainer

In [None]:
#-------------------
# setup.py
#-------------------

In [None]:
%%writefile mytrainingpackage/setup.py

from setuptools import find_packages
from setuptools import setup

REQUIRED_PACKAGES = ["gcsfs", "google-cloud-aiplatform"]

setup(
    name = 'trainer',
    version = '0.1',
    packages = find_packages(),
    install_requires = REQUIRED_PACKAGES,
    include_package_data = True,
    description = 'My training application.'
)

In [None]:
#-------------------
# setup.cfg
#-------------------

In [None]:
%%writefile mytrainingpackage/setup.cfg

[egg_info]

tag_build =

tag_date = 0

In [None]:
#-------------------
# PKG-INFO
#-------------------

In [None]:
%%writefile mytrainingpackage/PKG-INFO

Metadata-Version: 1.0

Name: model training

Version: 0.0.1

Summary: Demostration training script

Author-email: adampilz@google.com

License: Public

Description: Demo

Platform: Vertex

In [None]:
#-------------------
# README.txt
#-------------------

In [None]:
%%writefile mytrainingpackage/README.txt

# Example Package

This is an example package.

In [None]:
#-------------------
# __init__.py
#-------------------

In [None]:
! touch mytrainingpackage/trainer/__init__.py

In [None]:
#-------------------
# task.py
#-------------------

In [None]:
%%writefile mytrainingpackage/trainer/task.py

import argparse
import pickle
import os

# Sample Decision Tree Classifier
from sklearn import datasets
from sklearn import metrics
from sklearn.tree import DecisionTreeClassifier

from google.cloud import storage

# parse args
parser = argparse.ArgumentParser()
parser.add_argument('--bucket_name', dest='bucket_name', default="", type=str, help='The GCS bucket to store model artifacts -> w/o gs://')
parser.add_argument('--max_depth', dest='max_depth', default=10, type=int, help='The maximum depth of the tree')
args = parser.parse_args()

# load the iris datasets
dataset = datasets.load_iris()

# fit a CART model to the data
model = DecisionTreeClassifier(max_depth = args.max_depth)
model.fit(dataset.data, dataset.target)
print(model)

# make predictions
expected = dataset.target
predicted = model.predict(dataset.data)

# summarize the fit of the model
classification_report = metrics.classification_report(expected, predicted)
confusion_matrix = metrics.confusion_matrix(expected, predicted)

# save the model to disk
model_filename = "model.pkl"
with open(model_filename, 'wb') as model_file:
    pickle.dump(model, model_file)

# Upload the saved model file to GCS
storage_client = storage.Client()
bucket = storage_client.get_bucket(args.bucket_name)
model_directory = os.environ["AIP_MODEL_DIR"]
storage_path = os.path.join(model_directory, model_filename)
blob = storage.blob.Blob.from_string(storage_path, client=storage_client)
blob.upload_from_filename(model_filename)

In [None]:
#####################################################################
#
# upload and kick off the job
#
#####################################################################

In [None]:
# package the training folder into a compressed tar ball
! rm -f mytrainingpackage.tar mytrainingpackage.tar.gz
! tar cvf mytrainingpackage.tar mytrainingpackage
! gzip mytrainingpackage.tar

In [None]:
# Store training script on your Cloud Storage bucket
! gsutil cp mytrainingpackage.tar.gz $BUCKET_PATH/custompackagetesting/mytrainingpackage.tar.gz

In [None]:
#####################################################################
#
# save dist to GCS then to artifact registry
#
#####################################################################

In [None]:
from google.cloud import aiplatform
from datetime import datetime

In [None]:
# specify parameters
P = ! gcloud config list --format 'value(core.project)'
PROJECT_ID = P[0]
REGION = "us-central1"
BUCKET_NAME = f"bkt-{PROJECT_ID}-vpipelines"
BUCKET_PATH = f"gs://{BUCKET_NAME}"
PIPELINE_ROOT = f"{BUCKET_PATH}/pipeline_root"
PIPELINE_DATA = f"{BUCKET_PATH}/data"
TIMESTAMP = datetime.now().strftime("%Y%m%d%H%M%S")

In [None]:
TRAIN_VERSION  = "scikit-learn-cpu.0-23"
DEPLOY_VERSION = "sklearn-cpu.0-23"

TRAIN_IMAGE = "us-docker.pkg.dev/vertex-ai/training/{}:latest".format(TRAIN_VERSION)
DEPLOY_IMAGE = "us-docker.pkg.dev/vertex-ai/prediction/{}:latest".format(DEPLOY_VERSION)

In [None]:
JOB_DISPLAY_NAME = "vai_CustomPythonPackageTrainingJob"
PYTHON_PACKAGE_GCS_URI = f"{BUCKET_PATH}/custompackagetesting/mytrainingpackage.tar.gz"
PYTHON_MODULE_NAME = "trainer.task" # https://cloud.google.com/vertex-ai/docs/training/create-python-pre-built-container#python-modules

MAX_DEPTH = 20
CMDARGS = [  f"--bucket_name={BUCKET_NAME}"
           , f"--max_depth={str(MAX_DEPTH)}"
          ]

MODEL_DISPLAY_NAME = "vai_ModelRegName_CustomPythonPackageTrainingJob"

In [None]:
aiplatform.init(project=PROJECT_ID, location=REGION, staging_bucket=BUCKET_PATH)

job = aiplatform.CustomPythonPackageTrainingJob(
    display_name = JOB_DISPLAY_NAME
    , python_package_gcs_uri = PYTHON_PACKAGE_GCS_URI
    , python_module_name = PYTHON_MODULE_NAME
    , container_uri = TRAIN_IMAGE
    , model_serving_container_image_uri = DEPLOY_IMAGE
)

In [None]:
model = job.run(
    model_display_name = MODEL_DISPLAY_NAME
    , args = CMDARGS
    , replica_count = 1
    , machine_type = "n1-standard-4"
    , service_account = f"sa-vertex-pipelines@{PROJECT_ID}.iam.gserviceaccount.com"
)