In [2]:
import os

# The Google Cloud Notebook product has specific requirements
IS_GOOGLE_CLOUD_NOTEBOOK = os.path.exists("/opt/deeplearning/metadata/env_version")

# Google Cloud Notebook requires dependencies to be installed with '--user'
USER_FLAG = ""
if IS_GOOGLE_CLOUD_NOTEBOOK:
    USER_FLAG = "--user"
print(USER_FLAG)

--user


In [3]:
! pip install {USER_FLAG} google-cloud-aiplatform --upgrade
! pip install {USER_FLAG} kfp google-cloud-pipeline-components --upgrade

Collecting google-cloud-aiplatform
  Downloading google_cloud_aiplatform-1.3.0-py2.py3-none-any.whl (1.3 MB)
[K     |████████████████████████████████| 1.3 MB 5.2 MB/s eta 0:00:01
Installing collected packages: google-cloud-aiplatform
Successfully installed google-cloud-aiplatform-1.3.0
Collecting kfp
  Downloading kfp-1.7.1.tar.gz (233 kB)
[K     |████████████████████████████████| 233 kB 4.8 MB/s eta 0:00:01
[?25hCollecting google-cloud-pipeline-components
  Downloading google_cloud_pipeline_components-0.1.5-py3-none-any.whl (87 kB)
[K     |████████████████████████████████| 87 kB 9.3 MB/s  eta 0:00:01
[?25hCollecting absl-py<=0.11,>=0.9
  Downloading absl_py-0.11.0-py3-none-any.whl (127 kB)
[K     |████████████████████████████████| 127 kB 62.6 MB/s eta 0:00:01
Collecting kubernetes<13,>=8.0.0
  Downloading kubernetes-12.0.1-py2.py3-none-any.whl (1.7 MB)
[K     |████████████████████████████████| 1.7 MB 27.0 MB/s eta 0:00:01
[?25hCollecting google-api-python-client<2,>=1.7.8
  Do

In [2]:
import os
print(not os.getenv("IS_TESTING"))

True


In [3]:
!python3 -c "import kfp; print('KFP SDK version: {}'.format(kfp.__version__))"

KFP SDK version: 1.7.1


In [4]:

import os

PROJECT_ID = ""

# Get your Google Cloud project ID from gcloud
if not os.getenv("IS_TESTING"):
    shell_output=!gcloud config list --format 'value(core.project)' 2>/dev/null
    PROJECT_ID = shell_output[0]
    print("Project ID: ", PROJECT_ID)

Project ID:  dna-verizonpoc


In [5]:

from datetime import datetime

TIMESTAMP = datetime.now().strftime("%Y%m%d%H%M%S")

In [7]:
BUCKET_NAME = "gs://verexai_automl_text_data"  # @param {type:"string"}
REGION = "us-central1"  # @param {type:"string"}

In [8]:
PATH=%env PATH
%env PATH={PATH}:/home/jupyter/.local/bin

USER = ""  # <---CHANGE THIS
PIPELINE_ROOT = "{}/pipeline_root/{}".format(BUCKET_NAME, "custommodel")

PIPELINE_ROOT

env: PATH=/opt/conda/bin:/opt/conda/condabin:/usr/local/bin:/usr/bin:/bin:/usr/local/games:/usr/games:/home/jupyter/.local/bin


'gs://verexai_automl_text_data/pipeline_root/custommodel'

In [9]:
import kfp
from google.cloud import aiplatform
from google_cloud_pipeline_components import aiplatform as gcc_aip
from kfp.v2 import compiler
from kfp.v2.dsl import component
from kfp.v2.google import experimental
from kfp.v2.google.client import AIPlatformClient

In [14]:
# create args list for trainer

hp_dict: str = '{"num_hidden_layers": 3, "hidden_size": 32, "learning_rate": 0.01, "epochs": 1, "steps_per_epoch": -1}'
data_dir: str = "gs://verexai_automl_text_data/pipeline_root/custommodel/data_dir/"
TRAINER_ARGS = ["--data-dir", data_dir, "--hptune-dict", hp_dict]

# create working dir to pass to job spec
import time

ts = int(time.time())
WORKING_DIR = f"{PIPELINE_ROOT}/{ts}"

MODEL_DISPLAY_NAME = f"train_deploy{ts}"
print(TRAINER_ARGS, WORKING_DIR, MODEL_DISPLAY_NAME)

['--data-dir', 'gs://verexai_automl_text_data/pipeline_root/custommodel/data_dir/', '--hptune-dict', '{"num_hidden_layers": 3, "hidden_size": 32, "learning_rate": 0.01, "epochs": 1, "steps_per_epoch": -1}'] gs://verexai_automl_text_data/pipeline_root/custommodel/1629276215 train_deploy1629276215


In [13]:
!gsutil cp gs://aju-dev-demos-codelabs/bikes_weather/* gs://verexai_automl_text_data/pipeline_root/custommodel/data_dir/

Copying gs://aju-dev-demos-codelabs/bikes_weather/test-bw000000000008.csv [Content-Type=application/octet-stream]...
Copying gs://aju-dev-demos-codelabs/bikes_weather/test-bw000000000009.csv [Content-Type=application/octet-stream]...
Copying gs://aju-dev-demos-codelabs/bikes_weather/train-bw000000000000.csv [Content-Type=application/octet-stream]...
Copying gs://aju-dev-demos-codelabs/bikes_weather/train-bw000000000001.csv [Content-Type=application/octet-stream]...
/ [4 files][  1.3 GiB/  1.3 GiB]      0.0 B/s                                   
==> NOTE: You are performing a sequence of gsutil operations that may
run significantly faster if you instead use gsutil -m cp ... Please
see the -m section under "gsutil help options" for further information
about when gsutil -m can be advantageous.

Copying gs://aju-dev-demos-codelabs/bikes_weather/train-bw000000000002.csv [Content-Type=application/octet-stream]...
Copying gs://aju-dev-demos-codelabs/bikes_weather/train-bw000000000003.csv [Con

In [20]:

@component
def training_op(input1: str):
    print("training task: {}".format(input1))

In [21]:
@kfp.dsl.pipeline(name="train-endpoint-deploy" + TIMESTAMP)
def pipeline(
    project: str = PROJECT_ID,
    model_display_name: str = MODEL_DISPLAY_NAME,
    serving_container_image_uri: str = "us-docker.pkg.dev/cloud-aiplatform/prediction/tf2-cpu.2-3:latest",
):

    train_task = training_op("model training")
    experimental.run_as_aiplatform_custom_job(
        train_task,
        worker_pool_specs=[
            {
                "containerSpec": {
                    "args": TRAINER_ARGS,
                    "env": [{"name": "AIP_MODEL_DIR", "value": WORKING_DIR}],
                    "imageUri": "gcr.io/google-samples/bw-cc-train:latest",
                },
                "replicaCount": "1",
                "machineSpec": {
                    "machineType": "n1-standard-4",
                },
            }
        ],
    )

    model_upload_op = gcc_aip.ModelUploadOp(
        project=project,
        display_name=model_display_name,
        artifact_uri=WORKING_DIR,
        serving_container_image_uri=serving_container_image_uri,
        serving_container_environment_variables={"NOT_USED": "NO_VALUE"},
    )
    model_upload_op.after(train_task)

    endpoint_create_op = gcc_aip.EndpointCreateOp(
        project=project,
        display_name="pipelines-created-endpoint",
    )

    model_deploy_op = gcc_aip.ModelDeployOp(  # noqa: F841
        project=project,
        endpoint=endpoint_create_op.outputs["endpoint"],
        model=model_upload_op.outputs["model"],
        deployed_model_display_name=model_display_name,
        machine_type="n1-standard-4",
    )

In [16]:
!gcloud container images list --repository=gcr.io/google-samples

NAME
gcr.io/google-samples/adservice
gcr.io/google-samples/amyu_gatk_extensionsonly
gcr.io/google-samples/amyu_ml4h_reorder
gcr.io/google-samples/appengine-tensorboard
gcr.io/google-samples/automl-pipeline
gcr.io/google-samples/automl-ucaip
gcr.io/google-samples/bw-aiplatform
gcr.io/google-samples/bw-aiplatform-mb
gcr.io/google-samples/bw-cc-train
gcr.io/google-samples/bw-pipeline-tfserve
gcr.io/google-samples/bw-pl-bikes-train
gcr.io/google-samples/cartservice
gcr.io/google-samples/cassandra
gcr.io/google-samples/checkoutservice
gcr.io/google-samples/cloudbuild-kustomize
gcr.io/google-samples/container-analysis-webhook
gcr.io/google-samples/currencyservice
gcr.io/google-samples/echo-go
gcr.io/google-samples/echo-java
gcr.io/google-samples/echo-node
gcr.io/google-samples/echo-php
gcr.io/google-samples/echo-python
gcr.io/google-samples/echo-ruby
gcr.io/google-samples/emailservice
gcr.io/google-samples/env-backend
gcr.io/google-samples/env-show
gcr.io/google-samples/freshpod
gcr.io/googl

In [22]:
from kfp.v2 import compiler  # noqa: F811

compiler.Compiler().compile(
    pipeline_func=pipeline, package_path="train_upload_deploy.json"
)

In [23]:
from kfp.v2.google.client import AIPlatformClient  # noqa: F811

api_client = AIPlatformClient(
    project_id=PROJECT_ID,
    region=REGION,
)



In [24]:
response = api_client.create_run_from_job_spec(
    "train_upload_deploy.json",
    pipeline_root=PIPELINE_ROOT,
    parameter_values={"project": PROJECT_ID},
)

In [25]:
!gsutil cat  gs://verexai_automl_text_data/pipeline_root/custommodel/885855441164/train-endpoint-deploy20210818082222-20210818090416/endpoint-create_5069065263396159488/executor_output.json

{"artifacts": {"endpoint": {"artifacts": [{"name": "projects/885855441164/locations/us-central1/metadataStores/default/artifacts/15558373097935837787", "uri": "aiplatform://v1/projects/885855441164/locations/us-central1/endpoints/93572837570248704", "metadata": {}}]}}}

In [26]:
!gsutil head gs://aju-dev-demos-codelabs/bikes_weather/train-bw000000000002.csv

Did you mean this?
	setheader
CommandException: Invalid command "head".


In [28]:
!gsutil cat gs://aju-dev-demos-codelabs/bikes_weather/train-bw000000000002.csv |head

duration,end_station_id,bike_id,ts,day_of_week,start_station_id,start_latitude,start_longitude,end_latitude,end_longitude,euclidean,loc_cross,prcp,max,min,temp,dewp
5040,243,12433,1441557120,1,341,51.50810309,-0.12602103,51.49096258,-0.139625122,2125.886953695936,POINT(-0.13 51.51)POINT(-0.14 51.49),0.01,68,43.9,56.2,44.3
180,185,9982,1434911340,1,148,51.49211134,-0.138364847,51.48805753,-0.140741432,479.85713176080208,POINT(-0.14 51.49)POINT(-0.14 51.49),0.07,69.4,58.3,62.8,49.8
1140,540,10856,1427128620,2,457,51.542138,-0.145393,51.528302,-0.144466,1539.8309189322943,POINT(-0.15 51.54)POINT(-0.14 51.53),0,54,35.4,45.5,34.7
720,29,5083,1434645780,5,392,51.49942855,-0.179702476,51.513735,-0.193487,1854.9528279331748,POINT(-0.18 51.5)POINT(-0.19 51.51),0,71.8,57.9,64.1,47.9
1020,383,2944,1423471620,2,93,51.53440868,-0.109025404,51.51310333,-0.13121385,2822.9277534479152,POINT(-0.11 51.53)POINT(-0.13 51.51),0,50.2,35.2,42.8,36.4
780,325,12916,1420648920,4,178,51.48985626,-0.141923621,51.

In [30]:
!wget "http://ai.stanford.edu/~amaas/data/sentiment/aclImdb_v1.tar.gz"

--2021-08-18 10:50:55--  http://ai.stanford.edu/~amaas/data/sentiment/aclImdb_v1.tar.gz
Resolving ai.stanford.edu (ai.stanford.edu)... 171.64.68.10
Connecting to ai.stanford.edu (ai.stanford.edu)|171.64.68.10|:80... connected.
HTTP request sent, awaiting response... 200 OK
Length: 84125825 (80M) [application/x-gzip]
Saving to: ‘aclImdb_v1.tar.gz’


2021-08-18 10:50:59 (23.7 MB/s) - ‘aclImdb_v1.tar.gz’ saved [84125825/84125825]



In [31]:
!tar -xzf "aclImdb_v1.tar.gz"

In [44]:
!ls ./aclImdb/train/pos/ | cut -d'.' -f2|uniq -cd

  12500 txt


In [39]:
!pwd

/home/jupyter/Vertexai-Custom-Model-Covertype
