We start by downloading a specific release of the components because running from master is not a good way to buid "repetable" systems

In [None]:
!wget https://github.com/kubeflow/pipelines/archive/0.2.5.tar.gz

In [None]:
!tar -xvf 0.2.5.tar.gz

In [None]:
import kfp

In [None]:
#tag::loadGCSDLComponent[]
gcs_download_component = kfp.components.load_component_from_file(
    "pipelines-0.2.5/components/google-cloud/storage/download/component.yaml")
#end::loadGCSDLComponent[]
#tag::loadTFDVAndFriendsComponents[]
tfx_csv_gen = kfp.components.load_component_from_file(
    "pipelines-0.2.5/components/tfx/ExampleGen/CsvExampleGen/component.yaml")
tfx_statistic_gen = kfp.components.load_component_from_file(
    "pipelines-0.2.5/components/tfx/StatisticsGen/component.yaml")
tfx_schema_gen = kfp.components.load_component_from_file(
    "pipelines-0.2.5/components/tfx/SchemaGen/component.yaml")
tfx_example_validator = kfp.components.load_component_from_file(
    "pipelines-0.2.5/components/tfx/ExampleValidator/component.yaml")
#end::loadTFDVAndFriendsComponents[]

In [None]:
@kfp.dsl.pipeline(
  name='DL',
  description='Sample DL pipeline'
)
def pipeline_with_dl():
    #tag::dlOp[]
    dl_op = gcs_download_component(
        gcs_path="gs://ml-pipeline-playground/tensorflow-tfx-repo/tfx/components/testdata/external/csv") # Your path goes here
    #end::dlOp[]

In [None]:
kfp.compiler.Compiler().compile(pipeline_with_dl, 'dl_pipeline.zip')

In [None]:
client = kfp.Client()

In [None]:
my_experiment = client.create_experiment(name='dl')
my_run = client.run_pipeline(my_experiment.id, 'dl', 
  'dl_pipeline.zip')

In [None]:
#tag::standaloneTFDVPipeline[]
@kfp.dsl.pipeline(
  name='TFDV',
  description='TF DV Pipeline'
)
def tfdv_pipeline():
    # DL with wget, can use gcs instead as well
    fetch = kfp.dsl.ContainerOp(
      name='download',
      image='busybox',
      command=['sh', '-c'],
      arguments=[
          'sleep 1;'
          'mkdir -p /tmp/data;'
          'wget https://raw.githubusercontent.com/moorissa/medium/master/items-recommender/data/trx_data.csv -O /tmp/data/results.csv'],
      file_outputs={'downloaded': '/tmp/data'})
    # This expects a directory of inputs not just a single file
    records_example = tfx_csv_gen(input_base=fetch.output)
    stats = tfx_statistic_gen(input_data=records_example.output)
    schema_op = tfx_schema_gen(stats.output)
    tfx_example_validator(stats=stats.outputs['output'], schema=schema_op.outputs['output'])
#end::standaloneTFDVPipeline[]

In [None]:
kfp.compiler.Compiler().compile(tfdv_pipeline, 'tfdv_pipeline.zip')

In [None]:
my_experiment = client.create_experiment(name='tfdv_pipeline')
my_run = client.run_pipeline(my_experiment.id, 'tfdv', 
  'tfdv_pipeline.zip')