In [53]:
from kfp.v2 import dsl
from kfp.v2.dsl import (Artifact,
                        Dataset,
                        Input,
                        Output,
                        Model,
                        Metrics,
                        Markdown,
                        HTML,
                        component, 
                        OutputPath, 
                        InputPath)

from kfp.v2 import compiler
from google.cloud.aiplatform import pipeline_jobs

In [54]:
PROJECT_ID = "test-house-418522"
REGION = 'europe-west3'
BUCKET_NAME="gs://houseprice"
PIPELINE_ROOT = f"{BUCKET_NAME}/pipeline_root_houseprice/"

In [55]:
# Custom base image created using docker
IMAGE_NAME = "training"
BASE_IMAGE = f"{REGION}-docker.pkg.dev/{PROJECT_ID}/houseprice/{IMAGE_NAME}"


In [56]:
@component(
    base_image=BASE_IMAGE,
    output_component_file="get_data.yaml"
)
def get_test_data(
    filepath: str,
    test_file: Output[Dataset],
):
    
    import pandas as pd
    
    df = pd.read_csv(filepath + '/test.csv')
   
    df.to_csv(test_file.path, index=False)

  @component(
  def get_test_data(


In [57]:
@component(
    base_image=BASE_IMAGE,
    install_kfp_package=False,
    output_component_file="save_file.yaml",
)
def save_file(
        out_filepath: str,
        file: Input[Dataset]
):

    import pandas as pd

    test_df = pd.read_csv(file.path)

    test_df.to_csv(out_filepath + '/test.csv')


  @component(
  def save_file(


In [59]:
@dsl.pipeline(
    pipeline_root=PIPELINE_ROOT,
    name="pipeline-houseprice"   
)

def pipeline(
    data_filepath: str = f"{BUCKET_NAME}/data",
    out_filepath: str = f"{BUCKET_NAME}/out", 
):

    data_op = get_test_data(filepath=data_filepath)
    deploy_model_op = save_file(out_filepath=out_filepath, file = data_op.outputs["test_file"])

In [60]:
compiler.Compiler().compile(pipeline_func=pipeline,
        package_path='ml_test.json')

In [61]:
start_pipeline = pipeline_jobs.PipelineJob(
    display_name="houseprice-pipeline",
    template_path="ml_test.json",
    enable_caching=False,
    location=REGION,
)

In [62]:
start_pipeline.run()

Creating PipelineJob
PipelineJob created. Resource name: projects/502842367035/locations/europe-west3/pipelineJobs/pipeline-houseprice-20240328212516
To use this PipelineJob in another session:
pipeline_job = aiplatform.PipelineJob.get('projects/502842367035/locations/europe-west3/pipelineJobs/pipeline-houseprice-20240328212516')
View Pipeline Job:
https://console.cloud.google.com/vertex-ai/locations/europe-west3/pipelines/runs/pipeline-houseprice-20240328212516?project=502842367035
PipelineJob projects/502842367035/locations/europe-west3/pipelineJobs/pipeline-houseprice-20240328212516 current state:
PipelineState.PIPELINE_STATE_RUNNING
PipelineJob projects/502842367035/locations/europe-west3/pipelineJobs/pipeline-houseprice-20240328212516 current state:
PipelineState.PIPELINE_STATE_RUNNING
PipelineJob projects/502842367035/locations/europe-west3/pipelineJobs/pipeline-houseprice-20240328212516 current state:
PipelineState.PIPELINE_STATE_RUNNING
PipelineJob run completed. Resource name: