<a href="https://colab.research.google.com/github/deltorobarba/machinelearning/blob/master/kfp_scikit_training_ai_platform.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
#Please restart kernel if you upgraded the kfp package
#KFP_PACKAGE = 'https://storage.googleapis.com/ml-pipeline/release/0.1.20/kfp.tar.gz'
#!pip3 install $KFP_PACKAGE --upgrade

### Load pre made component from the KFP github
Here we take the existing Cloud AI Platform component that can be used to train a model on GCP. You can find an [example](https://github.com/kubeflow/pipelines/tree/master/components/gcp/ml_engine/train) and the component on the KFP Github.  



In [0]:
import kfp.components as comp

mlengine_train_op = comp.load_component_from_url(
    'https://raw.githubusercontent.com/kubeflow/pipelines/a97f1d0ad0e7b92203f35c5b0b9af3a314952e05/components/gcp/ml_engine/train/component.yaml')
help(mlengine_train_op)

Help on function Submitting a Cloud ML training job as a pipeline step:

Submitting a Cloud ML training job as a pipeline step(project_id:'GCPProjectID', python_module:'String'='', package_uris:'List'='', region:'GCPRegion'='', args:'List'='', job_dir:'GCSPath'='', python_version:'String'='', runtime_version:'String'='', master_image_uri:'GCRPath'='', worker_image_uri:'GCRPath'='', training_input:'Dict'='', job_id_prefix:'String'='', wait_interval:'Integer'='30')
    Submitting a Cloud ML training job as a pipeline step
    A Kubeflow Pipeline component to submit a Cloud Machine Learning (Cloud ML) 
    Engine training job as a step in a pipeline.



In [0]:
import kfp.dsl as dsl
import kfp.gcp as gcp
import json
@dsl.pipeline(
    name='CAIP training pipeline',
    description='CAIP training pipeline'
)
def pipeline(
    project_id = 'kfp-primer-workshop',
    python_module = 'trainer.task',
    package_uris = 'gs://kfp-scikit/model/zip/trainer.tar.gz',
    region = 'us-central1',
    args = json.dumps([
        '--pathdata', 'gs://erwinh-public-data/scikit/data/scikit_marketing-data.csv', 
        '--pathoutput', 'gs://kfp-scikit/model/output/', # change to your gcs bucket
        '--storage', 'BQ',
        '--bqtable', 'kfp-primer-workshop.marketing_data.raw' # change to your BQ table
    ]),
    job_dir = 'gs://kfp-scikit/tmp',  # change to your GCS bucket
    python_version = '3.5',
    runtime_version = '1.14',
    master_image_uri = '',
    worker_image_uri = '',
    training_input = '',
    job_id_prefix = 'marketing_model',
    wait_interval = '30'):
    
    task_1 = mlengine_train_op(
        project_id=project_id, 
        python_module=python_module, 
        package_uris=package_uris, 
        region=region, 
        args=args, 
        python_version=python_version,
        runtime_version=runtime_version, 
        master_image_uri=master_image_uri, 
        worker_image_uri=worker_image_uri, 
        training_input=training_input, 
        job_id_prefix=job_id_prefix, 
        wait_interval=wait_interval).apply(gcp.use_gcp_secret('user-gcp-sa'))
    
    

In [0]:
pipeline_func = pipeline
pipeline_filename = pipeline_func.__name__ + '.zip'
import kfp.compiler as compiler
compiler.Compiler().compile(pipeline_func, pipeline_filename)

In [0]:
YOUR_NAME = 'Erwin' #change to your name
EXPERIMENT_NAME = 'Scikit Learn CAIP V1.1 %s' % (YOUR_NAME) # change to your experiment name 

In [0]:
#Specify pipeline argument values
arguments = {}

#Get or create an experiment and submit a pipeline run
import kfp
client = kfp.Client()
experiment = client.create_experiment(EXPERIMENT_NAME)

#Submit a pipeline run
run_name = pipeline_func.__name__ + ' run'
run_result = client.run_pipeline(experiment.id, run_name, pipeline_filename, arguments)