**Check environment dependencies**

In [1]:
! python3 -c "import kfp; print('KFP SDK version: {}'.format(kfp.__version__))"
! python3 -c "import google_cloud_pipeline_components; print('google_cloud_pipeline_components version: {}'.format(google_cloud_pipeline_components.__version__))"
! python3 -c "import sklearn; print('Sklearn version: {}'.format(sklearn.__version__))"

KFP SDK version: 1.8.14
google_cloud_pipeline_components version: 1.0.26
Sklearn version: 1.1.2


In [2]:
from datetime import datetime

import google.cloud.aiplatform as aip
from google.cloud import aiplatform
import kfp
from kfp.v2 import dsl, compiler
from kfp.v2.google.client import AIPlatformClient

# custom code for data processing and model training
from utils import create_data, train_model

**Define environment variables**

User should update the <code>BUCKET_NAME</code>. The <code>PROJECT_ID</code> is picked up based on the gcloud configuration

In [3]:
TIMESTAMP = datetime.now().strftime("%Y%m%d%H%M%S")
BUCKET_NAME = "black-friday-dataset-test"  # modify
BUCKET_URI = f"gs://{BUCKET_NAME}"
REGION = "us-central1"
PIPELINE_ROOT = "{}/pipeline_root/black_friday".format(BUCKET_URI)
DISPLAY_NAME = "black-friday-" + TIMESTAMP
PACKAGE_PATH = "pipeline.json"
project_id_shell_output = !gcloud config list --format 'value(core.project)' 2>/dev/null
PROJECT_ID = project_id_shell_output[0]

## Data processing and model training

**Initialize the client**

In [4]:
aip.init(project=PROJECT_ID, staging_bucket=BUCKET_URI)

**Define the pipeline**

In [5]:
@dsl.pipeline(
    pipeline_root=PIPELINE_ROOT,
    name="black-friday-pipeline",
)
def pipeline(
    train_file_x: str,
    train_file_y: str,
    test_file_x: str,
    test_file_y: str,
    best_params_file: str,
    metrics_file: str,
    num_iterations: int,
    hp_tune: bool,
):

    create_data_task = create_data(
        project_id="mwpmltr", bucket_name=BUCKET_NAME, dataset_id="black_friday"
    )

    train_model_task = train_model(
        hp_tune=hp_tune,
        project_id="mwpmltr",
        bucket_name=BUCKET_NAME,
        num_iterations=num_iterations,
        train_file_x=create_data_task.outputs["train_file_x"],
        test_file_x=create_data_task.outputs["test_file_x"],
        train_file_y=create_data_task.outputs["train_file_y"],
        test_file_y=create_data_task.outputs["test_file_y"],
    )

In [6]:
compiler.Compiler().compile(pipeline_func=pipeline, package_path=PACKAGE_PATH)



**Submit the pipeline to Vertex AI Pipeline**

In [7]:
job = aip.PipelineJob(
    display_name=DISPLAY_NAME,
    template_path=PACKAGE_PATH,
    pipeline_root=PIPELINE_ROOT,
    parameter_values={
        "train_file_x": "x_train.csv",
        "train_file_y": "y_train.csv",
        "test_file_x": "x_test.csv",
        "test_file_y": "y_test.csv",
        "best_params_file": "best_params.json",
        "metrics_file": "metrics.json",
        "num_iterations": 100,
        "hp_tune": True,
    },
)

job.run()

Creating PipelineJob
PipelineJob created. Resource name: projects/55590906972/locations/us-central1/pipelineJobs/black-friday-pipeline-20221102205557
To use this PipelineJob in another session:
pipeline_job = aiplatform.PipelineJob.get('projects/55590906972/locations/us-central1/pipelineJobs/black-friday-pipeline-20221102205557')
View Pipeline Job:
https://console.cloud.google.com/vertex-ai/locations/us-central1/pipelines/runs/black-friday-pipeline-20221102205557?project=55590906972
PipelineJob projects/55590906972/locations/us-central1/pipelineJobs/black-friday-pipeline-20221102205557 current state:
PipelineState.PIPELINE_STATE_PENDING
PipelineJob projects/55590906972/locations/us-central1/pipelineJobs/black-friday-pipeline-20221102205557 current state:
PipelineState.PIPELINE_STATE_PENDING
PipelineJob projects/55590906972/locations/us-central1/pipelineJobs/black-friday-pipeline-20221102205557 current state:
PipelineState.PIPELINE_STATE_RUNNING
PipelineJob projects/55590906972/location

**Copy the model to your local directory**

Get the URI of the model file by clicking through the Vertex AI Pipeline UI. Select the model artifact and follow the path to the model

In [8]:
! gsutil cp gs://black-friday-dataset-test/pipeline_root/black_friday/55590906972/black-friday-pipeline-20221102205557/train-model_-8375679907920871424/model_file.pkl model.pkl



Updates are available for some Google Cloud CLI components.  To install them,
please run:
  $ gcloud components update

Copying gs://black-friday-dataset-test/pipeline_root/black_friday/55590906972/black-friday-pipeline-20221102205557/train-model_-8375679907920871424/model_file.pkl...
==> NOTE: You are downloading one or more large file(s), which would            
run significantly faster if you enabled sliced object downloads. This
feature is enabled by default but requires that compiled crcmod be
installed (see "gsutil help crcmod").

| [1 files][519.2 MiB/519.2 MiB]   1001 KiB/s                                   
Operation completed over 1 objects/519.2 MiB.                                    


## Model Deployment

**Set environment variables for command line arguments**

For custom prediction routines, a docker image must be provided

In [None]:
%env PROJECT_ID={PROJECT_ID}
%env REGION={REGION}
%env REPOSITORY=black-friday-v1
%env IMAGE=black-friday-image

In [None]:
# build image
!docker build --tag=$REGION-docker.pkg.dev/$PROJECT_ID/$REPOSITORY/$IMAGE .

In [None]:
# create repository in artifact repository
! gcloud artifacts repositories create $REPOSITORY  \
                             --repository-format=docker \
                             --location=$REGION

In [None]:
# push docker image to the newly created artifact repository
! docker push $REGION-docker.pkg.dev/$PROJECT_ID/$REPOSITORY/$IMAGE

In [None]:
# upload model to Vertex AI  model registry
! gcloud ai models upload \
  --region=us-central1 \
  --display-name=black-friday-model \
  --container-image-uri=$REGION-docker.pkg.dev/$PROJECT_ID/$REPOSITORY/$IMAGE \
  --container-ports=5005 \
  --container-health-route=/healthz \
  --container-predict-route=/predict

In [None]:
# list models to double check
!gcloud ai models list \
  --region=us-central1 \
  --filter=display_name=black-friday-model

In [None]:
# create a Vertex AI endpoint
!gcloud ai endpoints create \
  --region=us-central1 \
  --display-name=black-friday-test2

**Deploy the model to the endpoint**

The model endpoint is collected from the previous cell. The model id is collected from the list of models

In [None]:

!gcloud ai endpoints deploy-model 4126638662857261056 \
  --region=us-central1 \
  --model=7845406890321248256 \
  --display-name=black-friday-model \
  --machine-type=n1-standard-4 \
  --min-replica-count=1 \
  --max-replica-count=2 

In [12]:
import pandas as pd
d = pd.read_csv("gs://black-friday-dataset-test/pipeline_root/black_friday/55590906972/black-friday-pipeline-20221102205557/create-data_847692128933904384/train_file_x")

In [13]:
d.shape

(4716, 82)

In [18]:
metrics = pd.read_csv("gs://black-friday-dataset-test/pipeline_root/black_friday/55590906972/black-friday-pipeline-20221102205557/train-model_-8375679907920871424/metrics_file.json")

In [16]:
metrics

Unnamed: 0,"{""iteraion_0_accuracy"": 0.036016949152542374","""iteraion_1_accuracy"": 0.037076271186440676","""iteraion_2_accuracy"": 0.036016949152542374","""iteraion_3_accuracy"": 0.0423728813559322","""iteraion_4_accuracy"": 0.046610169491525424","""iteraion_5_accuracy"": 0.036016949152542374","""iteraion_6_accuracy"": 0.04766949152542373","""iteraion_7_accuracy"": 0.048728813559322036","""iteraion_8_accuracy"": 0.04343220338983051","""iteraion_9_accuracy"": 0.04978813559322034",...,"""iteraion_91_accuracy"": 0.028601694915254237","""iteraion_92_accuracy"": 0.03283898305084746","""iteraion_93_accuracy"": 0.037076271186440676","""iteraion_94_accuracy"": 0.037076271186440676","""iteraion_95_accuracy"": 0.046610169491525424","""iteraion_96_accuracy"": 0.04766949152542373","""iteraion_97_accuracy"": 0.04025423728813559","""iteraion_98_accuracy"": 0.0423728813559322","""iteraion_99_accuracy"": 0.05084745762711865","""final_model_accuracy"": ""4""}"
