In [68]:
from kfp.v2.dsl import component, Output, Artifact, Input

@component(packages_to_install=["pandas", "gcsfs"])
def prepare_data(dataset_train: str, dataset_test: str, train_uri: Output[Artifact], test_uri: Output[Artifact]):
    import numpy as np
    import pandas as pd
    
    train_dataset = pd.read_csv(dataset_train)
    test_dataset = pd.read_csv(dataset_test)
    
    train_stats = train_dataset.describe()
    train_stats.pop("MPG")
    train_stats = train_stats.transpose()
    
    train_labels = train_dataset.pop('MPG')
    test_labels = test_dataset.pop('MPG')
    
    def norm(x):
        return (x - train_stats['mean']) / train_stats['std']
    normed_train_data = pd.concat([norm(train_dataset), train_labels], axis=1)
    normed_test_data = pd.concat([norm(test_dataset), test_labels], axis=1)
    
    normed_train_data.to_csv(train_uri.path, index=False)
    normed_test_data.to_csv(test_uri.path, index=False)

In [69]:
@component(packages_to_install=["tensorflow", "pandas"])
def train_component(train_uri: Input[Artifact], test_uri: Input[Artifact], model_uri: str):
    import pandas as pd
    import tensorflow as tf
    from tensorflow import keras
    from tensorflow.keras import layers
    ## Load data
    
    normed_train_data = pd.read_csv(train_uri.path)
    normed_test_data = pd.read_csv(test_uri.path)
    
    train_labels = normed_train_data.pop('MPG')
    test_labels = normed_test_data.pop('MPG')
    
    print(normed_train_data)
    
    ## Building Model Function (neural network with 2 hidden layers, 64 neurons each, relu as activation layer.
    
    def build_model():
        model = keras.Sequential([
            layers.Dense(64, activation='relu', input_shape=[len(normed_train_data.keys())]),
            layers.Dense(64, activation='relu'),
            layers.Dense(1)
        ])
        
        optimizer = tf.keras.optimizers.RMSprop(0.001)
        model.compile(loss='mse',
                      optimizer=optimizer,
                      metrics=['mae', 'mse'])
        return model  
    
    model = build_model()
    EPOCHS = 1000

    # The patience parameter is the amount of epochs to check for improvement
    early_stop = keras.callbacks.EarlyStopping(monitor='val_loss', patience=10)
    early_history = model.fit(normed_train_data, train_labels, epochs=EPOCHS, validation_split = 0.2, callbacks=[early_stop])
    
    # Export model and save to GCS
    model.save(model_uri)

In [84]:
from kfp.v2.dsl import pipeline
from kfp.v2.dsl import importer
from google_cloud_pipeline_components import aiplatform as gcc
from google_cloud_pipeline_components.types import artifact_types

@pipeline(name="kfp-tf-mpg")
def pipeline(
    model_uri: str,
    project: str
):
    prepare_data_job = prepare_data(
        dataset_train="gs://vtx-datasets/public/mpg/train.csv",
        dataset_test="gs://vtx-datasets/public/mpg/test.csv"
    )
    train_component_job = train_component(
        train_uri = prepare_data_job.outputs["train_uri"], 
        test_uri = prepare_data_job.outputs["test_uri"],
        model_uri = model_uri
    )
    import_unmanaged_model_task = importer(
        artifact_uri = model_uri,
        artifact_class = artifact_types.UnmanagedContainerModel,
        metadata = {
            "containerSpec": {
                "imageUri": "us-docker.pkg.dev/vertex-ai/prediction/tf2-cpu.2-10:latest"
            }
        }
    )
    model_upload_job = gcc.ModelUploadOp(
        project = project,
        display_name = "kfp-mpg-model",
        unmanaged_container_model = import_unmanaged_model_task.outputs["artifact"]
    )
    model_upload_job.after(train_component_job)
    create_endpoint_op = gcc.EndpointCreateOp(
        project = project,
        display_name = "kfp-mpg-endpoint",
    )
    model_deploy_op = gcc.ModelDeployOp(
        model = model_upload_job.outputs["model"],
        endpoint = create_endpoint_op.outputs['endpoint'],
        dedicated_resources_machine_type = 'n1-standard-4',
        dedicated_resources_min_replica_count = 1,
        dedicated_resources_max_replica_count = 1
    )

In [85]:
from kfp.v2 import compiler
compiler.Compiler().compile(pipeline_func=pipeline,
        package_path='kfp-tf-mpg.json')

In [86]:
import google.cloud.aiplatform as aip

job = aip.PipelineJob(
    display_name="kfp-v2",
    template_path="kfp-tf-mpg.json",
    pipeline_root="gs://vtx-pipe-root/",
    parameter_values={
        "model_uri": "gs://vtx-models/mpg-kfp/",
        "project": "jchavezar-demo"
    }
)

job.submit()

Creating PipelineJob
PipelineJob created. Resource name: projects/569083142710/locations/us-central1/pipelineJobs/kfp-tf-mpg-20221122131032
To use this PipelineJob in another session:
pipeline_job = aiplatform.PipelineJob.get('projects/569083142710/locations/us-central1/pipelineJobs/kfp-tf-mpg-20221122131032')
View Pipeline Job:
https://console.cloud.google.com/vertex-ai/locations/us-central1/pipelines/runs/kfp-tf-mpg-20221122131032?project=569083142710
