In [156]:
import kfp
from kfp import dsl
from kubernetes.client.models import V1EnvVar

In [157]:
def setup_volume_op():
    return dsl.VolumeOp(
        name="Creation of Volume",
        resource_name="dataset_pvc",
        modes=dsl.VOLUME_MODE_RWO,
        size="1Gi"
    )

In [158]:
def download_dataset_op(url, volume, data_path):
    return dsl.ContainerOp(
        name='Download Dataset',
        image='google/cloud-sdk:272.0.0',
        command=['sh', '-c'],
        arguments=['gsutil cat $0 | tee $1', url, data_path],
        pvolumes={"/mnt": volume}
    )    

In [159]:
def train_op(volume, trained_path, data_path):
    op = dsl.ContainerOp(
        name='Train ML',
        image='davidzeng/ml_kube',
        command=['sh', '-c'],
        arguments=['python3 train.py'],
        pvolumes={'/mnt': volume}
    )
    op.container.add_env_variable(V1EnvVar('TRAINED_MODEL_PATH', trained_path))
    op.container.add_env_variable(V1EnvVar('DATA_PATH', data_path))
    return op

In [160]:
def check_op(volume, trained_path):
    return dsl.ContainerOp(
        name='Check Trained Model',
        image='library/bash:4.4.23',
        command=['sh', '-c'],
        arguments=['wc $0', trained_path],
        pvolumes={"/mnt": volume}
    )

In [161]:
@dsl.pipeline(
    name='Sequential pipeline',
    description='A pipeline with two sequential steps.'
)
def sequential_pipeline(url='gs://ml_kube_bucket/kc_house_data.csv', \
                        trained_path='/mnt/trained_model', \
                        data_path='/mnt/dataset.csv'):
    """A pipeline with two sequential steps."""
    volume_task = setup_volume_op()
    dataset_task = download_dataset_op(url, volume_task.volume, data_path)
    train_task = train_op(dataset_task.pvolume, trained_path, data_path)
    check_task = check_op(train_task.pvolume, trained_path)

In [162]:
if __name__ == '__main__':
    kfp.compiler.Compiler().compile(sequential_pipeline, 'ml_kube.yaml')