In [None]:
# collect_data_pipeline.ipynb
import kfp
from kfp import dsl
from kfp.components import func_to_container_op
from typing import NamedTuple


def collect_data_component(gen_count: int = 100):
    import datetime
    import os
    from random import randint
    import tensorflow as tf
    from PIL import Image, ImageOps
    from tensorflow.keras.preprocessing.image import ImageDataGenerator

    def save_image(filename, data_array):
        im = Image.fromarray(data_array.astype('uint8'))
        im_invert = ImageOps.invert(im)
        im_invert.save(filename)

    img_rows, img_cols = 28, 28

    mnist = tf.keras.datasets.fashion_mnist
    (x_train, y_train), (x_test, y_test) = mnist.load_data()
    x_train_reshaped = x_train.reshape(x_train.shape[0], img_rows, img_cols, 1)

    datagen = ImageDataGenerator(rotation_range=180)

    pick_index = randint(1, len(x_train) / gen_count)
    start = (pick_index - 1) * gen_count
    end = pick_index * gen_count
    rotated_train = datagen.flow(x_train_reshaped[start:end],
                                    y_train[start:end],
                                    batch_size=1)
    raw_path = "/notebook/new_dataset/raw"
    date_postfix = datetime.datetime.now().strftime("%Y%m%d%H%M%S")
    os.makedirs(raw_path, exist_ok=True)
    for i in range(len(rotated_train)):
        rotated = rotated_train.next()
        filename = f"{raw_path}/{rotated[1][0]}-{date_postfix}{i}.png"
        save_image(filename, rotated[0].reshape(img_rows, img_cols))



In [None]:
# collect_data_pipeline.ipynb
def collect_data_pipeline(gen_count: int):
    collect_data_op = func_to_container_op(collect_data_component,
                                     base_image="dudaji/cap-jupyterlab:tf2.0-cpu") 

    notebook_vol = dsl.PipelineVolume(pvc="workspace-handson")
    collect_data = collect_data_op(gen_count) \
        .add_pvolumes(pvolumes={"/notebook": notebook_vol})
    collect_data.execution_options.caching_strategy.max_cache_staleness = "P0D"   
    
arguments = {"gen_count": 100}

client = kfp.Client()
client.create_run_from_pipeline_func(collect_data_pipeline, 
                                     experiment_name="collect_raw_data",
                                     arguments=arguments)                     

In [None]:
# collect_data_pipeline.ipynb
kfp.compiler.Compiler().compile(
    pipeline_func=collect_data_pipeline,
    package_path='collect_data_pipeline.yaml')

client.upload_pipeline(pipeline_name="collect_data_pipeline_test",
                       description="Collect new image",
                       pipeline_package_path="collect_data_pipeline.yaml")