In [None]:
import random
import kfp
import kfp_server_api
import os
import string
import time
from google.cloud import storage
from kfp.components import create_component_from_func
from datetime import datetime, timezone, timedelta

import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from scipy import stats

# CHANGE necessary paramters here
# host is your KFP endpoint
host = 'http://127.0.0.1:3000'
# number of times you want to run the pipeline
num_runs = 50

file_name = 'pipeline_10_nodes.yaml'
pipeline_name_prefix = 'pipeline_' + os.path.splitext(file_name)[0] + "_"
experiment_name_prefix = 'experiment_for_runs_of_'
run_name_prefix = 'run_of_'
bucket_name = ''

def random_suffix() -> string:
    return ''.join(random.choices(string.ascii_lowercase + string.digits, k=10))


@create_component_from_func
def add_op(a: float, b: float) -> float:
    return a + b


def generated_pipeline():
    """Generates a pipeline with randomly connected component graph."""
    tasks = [add_op(3, 5)]
    for _ in range(10):
        a = random.choice(tasks).output
        b = random.choice(tasks).output
        task = add_op(a, b)
        tasks.append(task)


if __name__ == '__main__':
    kfp.compiler.Compiler().compile(generated_pipeline, file_name)
    client = kfp.Client(host)
    pipeline_file = os.path.join(file_name)

    pipeline_name = pipeline_name_prefix + random_suffix()
    pipeline = client.pipeline_uploads.upload_pipeline(pipeline_file, name= pipeline_name)
    default_version_id = pipeline.default_version.id    

    experiment_name = experiment_name_prefix + pipeline_name
    experiment = client.experiments.create_experiment(body={'name' : experiment_name})
    experiment_id = experiment.id
    
    # Measure create run latency
    create_run_latencies = []
    for i in range(num_runs):
        resource_references = []
        key = kfp_server_api.models.ApiResourceKey(id=experiment_id, type=kfp_server_api.models.ApiResourceType.EXPERIMENT)
        reference = kfp_server_api.models.ApiResourceReference(key=key, relationship=kfp_server_api.models.ApiRelationship.OWNER)
        resource_references.append(reference)
        key = kfp_server_api.models.ApiResourceKey(id=default_version_id, type=kfp_server_api.models.ApiResourceType.PIPELINE_VERSION)
        reference = kfp_server_api.models.ApiResourceReference(key=key, relationship=kfp_server_api.models.ApiRelationship.CREATOR)
        resource_references.append(reference)

        start = time.perf_counter()
        run_name = run_name_prefix + pipeline_name + '_' + random_suffix()
        pipeline = client.runs.create_run(body={'name':run_name, 'resource_references': resource_references})  
        dur = time.perf_counter() - start
        create_run_latencies.append(dur)      

    # Plot
    sns.distplot(a=create_run_latencies)