In [2]:
import os

# The Vertex AI Workbench Notebook product has specific requirements
IS_WORKBENCH_NOTEBOOK = os.getenv("DL_ANACONDA_HOME") and not os.getenv("VIRTUAL_ENV")
IS_USER_MANAGED_WORKBENCH_NOTEBOOK = os.path.exists(
    "/opt/deeplearning/metadata/env_version"
)

# Vertex AI Notebook requires dependencies to be installed with '--user'
USER_FLAG = ""
if IS_WORKBENCH_NOTEBOOK:
    USER_FLAG = "--user"


In [3]:
import os
import sys

# If on Vertex AI Workbench, then don't execute this code
IS_COLAB = "google.colab" in sys.modules
if not os.path.exists("/opt/deeplearning/metadata/env_version") and not os.getenv(
    "DL_ANACONDA_HOME"
):
    if "google.colab" in sys.modules:
        from google.colab import auth as google_auth

        google_auth.authenticate_user()

    # If you are running this notebook locally, replace the string below with the
    # path to your service account key and run this cell to authenticate your GCP
    # account.
    elif not os.getenv("IS_TESTING"):
        %env GOOGLE_APPLICATION_CREDENTIALS supple-life-353322-b54afcdb7429.json

env: GOOGLE_APPLICATION_CREDENTIALS=supple-life-353322-b54afcdb7429.json


In [4]:
PROJECT_ID = 'supple-life-353322'
REGION = 'us-central1'

In [5]:
from datetime import datetime

TIMESTAMP = datetime.now().strftime("%Y%m%d%H%M%S")

In [6]:
BUCKET_NAME = "cgjhusker-container-bucket"
BUCKET_URI = f"gs://{BUCKET_NAME}"

In [7]:
SERVICE_ACCOUNT = '1003672301353-compute@developer.gserviceaccount.com'

In [8]:
from typing import NamedTuple

import google.cloud.aiplatform as aip
from kfp import dsl
from kfp.v2 import compiler
from kfp.v2.dsl import component, Dataset, Output, Input

In [9]:
API_ENDPOINT = "{}-aiplatform.googleapis.com".format(REGION)
PIPELINE_ROOT = "{}/pipeline_root/intro".format(BUCKET_URI)
aip.init(project=PROJECT_ID, staging_bucket=BUCKET_URI)

In [10]:
@component(output_component_file="data.yaml", base_image="python:3.9", packages_to_install=['tensorflow', 'keras', 'numpy', "pyarrow"])
def parse_data(
    dataset_train: Output[Dataset],
    dataset_test: Output[Dataset]
):

    import numpy as np
    import tensorflow as tf
    from tensorflow.keras.utils import to_categorical

    fashion_mnist = tf.keras.datasets.fashion_mnist
    (train_images, train_labels), (test_images, test_labels) = fashion_mnist.load_data() 

    testX = test_images.reshape((test_images.shape[0], 28, 28, 1))
    trainX = train_images.reshape((train_images.shape[0], 28, 28, 1))
    test_images_scaled = testX / 255.0
    train_images_scaled = trainX / 255.0
    test_label_ohe = to_categorical(test_labels)
    train_label_ohe = to_categorical(train_labels)


    file1 = open(dataset_train.path + ".npy", "wb")
    # save array to the file
    np.save(file1, train_images_scaled)
    # close the file
    file1.close

    file2 = open(dataset_test.path + ".npy", "wb")
    # save array to the file
    np.save(file2, train_label_ohe)
    # close the file
    file2.close

In [11]:
@component(output_component_file="model.yaml", base_image="python:3.9", packages_to_install=['tensorflow', 'keras', 'numpy'])
def train_model(
    dataset_x:  Input[Dataset],
    dataset_y:  Input[Dataset],
):

    import numpy as np
    from tensorflow.keras import layers, models

    file1 = open(dataset_x.path + ".npy", "rb")
    file2 = open(dataset_y.path + ".npy", "rb")
    #read the file to numpy array
    train_images_scaled = np.load(file1)
    train_label_ohe = np.load(file2)

    model = models.Sequential()
    model.add(layers.Conv2D(32, (3, 3), activation='relu', kernel_initializer='he_uniform', input_shape=(28, 28, 1)))
    model.add(layers.MaxPooling2D((2, 2)))
    model.add(layers.Flatten())
    model.add(layers.Dense(100, activation='relu', kernel_initializer='he_uniform'))
    model.add(layers.Dense(10, activation='softmax'))

    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

    model.fit(train_images_scaled, train_label_ohe, validation_split=0.15, epochs=5)

In [12]:
@dsl.pipeline(
    name="test",
    description="A simple intro pipeline",
    pipeline_root=PIPELINE_ROOT,
)
def pipeline():
    data = parse_data()
    model = train_model(data.outputs["dataset_train"], data.outputs["dataset_test"])

In [13]:
from kfp.v2 import compiler  # noqa: F811

compiler.Compiler().compile(pipeline_func=pipeline, package_path="intro_pipeline.json")



In [14]:
DISPLAY_NAME = "intro_" + TIMESTAMP

job = aip.PipelineJob(
    display_name=DISPLAY_NAME,
    template_path="intro_pipeline.json",
    pipeline_root=PIPELINE_ROOT,
)

job.run()

Creating PipelineJob
PipelineJob created. Resource name: projects/1003672301353/locations/us-central1/pipelineJobs/test-20220805101849
To use this PipelineJob in another session:
pipeline_job = aiplatform.PipelineJob.get('projects/1003672301353/locations/us-central1/pipelineJobs/test-20220805101849')
View Pipeline Job:
https://console.cloud.google.com/vertex-ai/locations/us-central1/pipelines/runs/test-20220805101849?project=1003672301353
PipelineJob run completed. Resource name: projects/1003672301353/locations/us-central1/pipelineJobs/test-20220805101849
