In [31]:
!pip install kfp==1.8.9
!pip install google-cloud-pipeline-components==0.2.0



In [None]:
import kfp
from kfp import components
import kfp.dsl as dsl
from kfp.v2 import compiler
from kfp.v2.google.client import AIPlatformClient
from google_cloud_pipeline_components import aiplatform as gcc_aip

from components.preprocessing import split_data
from components.training import xgb_training
import pipeline.pipeline 

In [28]:
BUCKET_NAME = "gs://feature-store-mars21"

@dsl.pipeline(
  name='sklearn-pipeline',
  description='pipeline training an sklearn model',
  pipeline_root=BUCKET_NAME+"/sklearn-pipeline"
)
def pipeline(
    data_path: str,
    project_id: str,
    bucket_name: str=BUCKET_NAME+"/xgb-pl",
    endpoint_name: str = 'sklearn-ep'
):
    
    prepro_op = split_data(data_path)
    
    
    train_op = xgb_training(
        prepro_op.outputs['data_out_x_train'],
        prepro_op.outputs['data_out_y_train'],
        prepro_op.outputs['data_out_x_test'],
        prepro_op.outputs['data_out_y_test']
    )
    train_op.set_cpu_limit('4')
    train_op.set_memory_limit('14Gi')
    #train_op.add_node_selector_constraint('cloud.google.com/gke-accelerator', 'nvidia-tesla-k80')
    #train_op.set_gpu_limit(1)
    
    ### Create endpoint
    endpoint_create_op = gcc_aip.EndpointCreateOp(
        project=project_id,
        display_name=endpoint_name
    ).after(train_op)
    
    
    ### Use predefined component to upload model
    model_upload_op = gcc_aip.ModelUploadOp(
        project=project_id,
        display_name='modelxgb',
        artifact_uri=train_op.outputs["path"],
        serving_container_image_uri='us-docker.pkg.dev/vertex-ai/prediction/xgboost-cpu.1-4:latest'
    ).after(train_op)
    
    
    #deploy_op = gcc_aip.ModelDeployOp(  
    #    model=train_op.outputs["model"],
    #)
    
    custom_model_deploy_op = gcc_aip.ModelDeployOp(
       endpoint=endpoint_create_op.outputs["endpoint"],
        model=model_upload_op.outputs["model"],
        dedicated_resources_machine_type="n1-standard-4",
        dedicated_resources_min_replica_count=1
    )

In [29]:
compiler.Compiler().compile(
    pipeline_func=pipeline.pipeline,
    package_path="pl.json"
)


TypeError: split_data() missing 4 required positional arguments: 'data_out_x_test', 'data_out_y_train', 'data_out_y_test', and 'data_path'

In [12]:
from google.cloud.aiplatform.pipeline_jobs import PipelineJob

pl = PipelineJob(display_name= 'xgb-job',
        template_path= "pl.json",
        location='us-central1',
        parameter_values={'project_id': 'feature-store-mars21', 
                          'data_path': 'gs://mortgage_dataset_files/mortgage-small.csv'})

pl.run(sync=False)

FileNotFoundError: [Errno 2] No such file or directory: 'pl.json'

In [29]:
!pip list

Package                          Version
-------------------------------- -----------
absl-py                          0.11.0
aiohttp                          3.7.4.post0
ansiwrap                         0.8.4
anyio                            3.3.0
appdirs                          1.4.4
argcomplete                      1.12.3
argon2-cffi                      20.1.0
arrow                            1.1.1
asn1crypto                       1.4.0
async-generator                  1.10
async-timeout                    3.0.1
attrs                            21.2.0
backcall                         0.2.0
backports.functools-lru-cache    1.6.4
beatrix-jupyterlab               0.9.1
binaryornot                      0.4.4
black                            21.8b0
bleach                           4.1.0
blinker                          1.4
Bottleneck                       1.3.2
brotlipy                         0.7.0
cachetools                       4.2.4
caip-notebooks-serverextension   1.0.0
certifi  

In [21]:
type({'project_id': 'feature-store-mars21', 
                          'data_path': 'gs://mortgage_dataset_files/mortgage-small.csv'})

dict

In [24]:
d = "{'project_id': 'feature-store-mars21', 'data_path': 'gs://mortgage_dataset_files/mortgage-small.csv'}"

In [25]:
import ast
ast.literal_eval(d)

{'project_id': 'feature-store-mars21',
 'data_path': 'gs://mortgage_dataset_files/mortgage-small.csv'}