In [None]:
from kfp.v2 import dsl
from kfp.v2.dsl import (Artifact,
                        Dataset,
                        Input,
                        Output,
                        Model,
                        Metrics,
                        Markdown,
                        HTML,
                        component, 
                        OutputPath, 
                        InputPath)

from kfp.v2 import compiler
from google.cloud import aiplatform as vertex_
from google.cloud.aiplatform import pipeline_jobs

from datetime import datetime
import pandas as pd

In [None]:
PROJECT_ID = "test-house"
REGION = 'europe-west3'
BUCKET_NAME="gs://" + PROJECT_ID + "-houseprice"
PIPELINE_ROOT = f"{BUCKET_NAME}/pipeline_root_houseprice/"

In [None]:
# Custom base image created using docker
IMAGE_NAME = "training"
BASE_IMAGE = f"{REGION}-docker.pkg.dev/{PROJECT_ID}/houseprice/{IMAGE_NAME}"

In [None]:
@component(
    base_image=BASE_IMAGE,
    output_component_file="get_data.yaml"
)
def get_test_data(
    filepath: str,
    test_file: Output[Dataset],
):
    
    import pandas as pd
    
    df = pd.read_csv(filepath + '/test.csv')
   
    df.to_csv(test_file.path, index=False)

In [None]:
@component(
    base_image=BASE_IMAGE,
    install_kfp_package=False,
    output_component_file="save_file.yaml",
)
def save_file(
        out_filepath: str,
        file: Input[Dataset]
):

    import pandas as pd

    test_df = pd.read_csv(file.path)

    test_df.to_csv(out_filepath + '/test.csv')


In [None]:
# USE TIMESTAMP TO DEFINE UNIQUE PIPELINE NAMES
# TIMESTAMP = datetime.now().strftime("%Y%m%d%H%M%S")
# DISPLAY_NAME = 'pipeline-houseprice-job{}'.format(TIMESTAMP)

In [None]:
@dsl.pipeline(
    pipeline_root=PIPELINE_ROOT,
    # A name for the pipeline. Use to determine the pipeline Context.
    name="pipeline-houseprice"   
)

def pipeline(
    data_filepath: str = f"{BUCKET_NAME}/data",
    out_filepath: str = f"{BUCKET_NAME}/out",
    # project: str = PROJECT_ID,
    # region: str = REGION, 
    # display_name: str = DISPLAY_NAME,    
):

    data_op = get_test_data(data_filepath)
    deploy_model_op = save_file(out_filepath, file = data_op['test_file'])

In [None]:
compiler.Compiler().compile(pipeline_func=pipeline,
        package_path='ml_test.json')

In [None]:
start_pipeline = pipeline_jobs.PipelineJob(
    display_name="houseprice-pipeline",
    template_path="ml_test.json",
    enable_caching=False,
    location=REGION,
)

In [None]:
start_pipeline.run()