In [96]:
import kfp
from kfp import dsl

In [97]:
def setup_volume_op():
    return dsl.VolumeOp(
        name="Creation of Volume",
        resource_name="dataset_pvc",
        modes=dsl.VOLUME_MODE_RWO,
        size="1Gi"
    )

In [98]:
def download_dataset_op(url, volume):
    return dsl.ContainerOp(
        name='Download Dataset',
        image='google/cloud-sdk:272.0.0',
        command=['sh', '-c'],
        arguments=['gsutil cat $0 | tee $1', url, '/mnt/dataset.csv'],
        pvolumes={"/mnt": volume}
    )    

In [99]:
def echo_op(volume):
    return dsl.ContainerOp(
        name='echo',
        image='library/bash:4.4.23',
        command=['sh', '-c'],
        arguments=['cat /mnt/dataset.csv'],
        pvolumes={"/mnt": volume}
    )

In [100]:
@dsl.pipeline(
    name='Sequential pipeline',
    description='A pipeline with two sequential steps.'
)
def sequential_pipeline(url='gs://ml_kube_bucket/kc_house_data.csv'):
    """A pipeline with two sequential steps."""
    volume_task = setup_volume_op()
    dataset_task = download_dataset_op(url, volume_task.volume)
    echo_task = echo_op(dataset_task.pvolume)

In [101]:
if __name__ == '__main__':
    kfp.compiler.Compiler().compile(sequential_pipeline, 'ml_kube.yaml')