In [None]:
#####################################################################
#
#       aiplatform.CustomJob.from_local_script
#
#####################################################################

In [None]:
from datetime import datetime
import os

# google
from google.cloud import aiplatform

In [None]:
# specify parameters
P = ! gcloud config list --format 'value(core.project)'
PROJECT_ID = P[0]
REGION = "us-central1"

SERVICE_ACCOUNT = f"sa-vertex-pipelines@{PROJECT_ID}.iam.gserviceaccount.com"
NETWORK = "vpc-adam-default"

USE_CASE = "custom-training"
ML_FRAMEWORK = "scikit"
MODEL_TYPE = "binclass"
TIMESTAMP = datetime.now().strftime("%Y%m%d%H%M%S")

# GCS inputs
GCS_BUCKET_NAME = f"bkt-{REGION}-{USE_CASE}"
GCS_BUCKET_PATH = f"gs://{GCS_BUCKET_NAME}"
GCS_BUCKET_PATH_DATA = f"{GCS_BUCKET_PATH}/data"
GCS_BUCKET_PATH_CONFIGS = f"{GCS_BUCKET_PATH}/configs"
GCS_BUCKET_PATH_TMP = f"{GCS_BUCKET_PATH}/tmp"
GCS_BUCKET_PATH_STAGING = f"{GCS_BUCKET_PATH}/staging"

TRAIN_DS = "tab_class_10inps_1krows_tes_3498.csv"

In [None]:
#####################################################################
#
# define the training script
#
#####################################################################

In [None]:
%%writefile training_script.py

import os
import argparse
from google.cloud import storage


# Upload the saved model file to GCS -> using the client library
model_filename = "env_vars_client.txt"
with open(model_filename, 'w') as f:    
    for k, v in os.environ.items():
        f.write(f"{k}={v}" + "\n")

storage_client = storage.Client()
model_directory = os.environ["AIP_MODEL_DIR"]
storage_path = os.path.join(model_directory, model_filename)
blob = storage.blob.Blob.from_string(storage_path, client=storage_client)
blob.upload_from_filename(model_filename)


# Upload the saved model file to GCS -> using the client library
model_filename = "env_vars_fuse.txt"
with open(f"/gcs/{args.bucket_name}/model/{model_filename}", 'w') as f:
    for k, v in os.environ.items():
        f.write(f"{k}={v}" + "\n")

In [None]:
#####################################################################
#
# kick off the custom training job
#
#####################################################################

In [None]:
from google.cloud import aiplatform

In [None]:
aiplatform.init(project=PROJECT_ID, location=REGION)

job = aiplatform.CustomJob.from_local_script(
    display_name = f"CustomJob_fromLocalScript_{TIMESTAMP}"
    , project = PROJECT_ID
    , location = REGION
    , staging_bucket = GCS_BUCKET_PATH
    , script_path = "training_script.py"
    , container_uri = "us-docker.pkg.dev/vertex-ai/training/scikit-learn-cpu.0-23:latest"
    , requirements = ["gcsfs"]
    , replica_count = 1
    , machine_type = "n1-standard-4"
    , accelerator_count = 0
    , args = [f"--bucket_name={GCS_BUCKET_NAME}", f"--train_ds={TRAIN_DS}"]
    , environment_variables = { 'MY_KEY': 'MY_VALUE' }
    , labels={'my_key': 'my_value'}
)

In [None]:
job.run(service_account = f"sa-vertex-pipelines@{PROJECT_ID}.iam.gserviceaccount.com")