In [1]:
import os
import shutil


def create_model_files(k):

#     dir_path = f"./models/add10-{k}"
#     if not os.path.exists(dir_path):
#         os.makedirs(dir_path)

#     model_file_path = os.path.join(dir_path, "model.py")
#     if not os.path.exists(model_file_path):
#         shutil.copy(f"./models/add10/model.py", model_file_path)

#     model_settings_file_path = os.path.join(dir_path, "model-settings.json")
#     if not os.path.exists(model_settings_file_path):
#         with open(model_settings_file_path, "w") as model_settings_file:
#             model_settings_file.write("""{
#     "name": "add10-""" + str(k) + """",
#     "implementation": "model.Add10",
#     "parameters": {
#         "version": "v0.0.1"
#     }
# }
#         """)

    model_yaml_path = f"./models/add10-{k}.yaml"
    if not os.path.exists(model_yaml_path):
        with open(model_yaml_path, "w") as model_yaml:
            model_yaml.write("""apiVersion: mlops.seldon.io/v1alpha1
kind: Model
metadata:
  name: add10-""" + str(k) + """
spec:
  storageUri: "gs://seldon-models/scv2/examples/latency-tests/mlserver/add10"
  requirements:
  - mlserver
  - python
        """)


def remove_model_files(k):
    dir_path = f"./models/add10-{k}"
    if os.path.exists(dir_path):
        shutil.rmtree(dir_path)

    model_yaml_path = f"./models/add10-{k}.yaml"
    if os.path.exists(model_yaml_path):
        os.remove(model_yaml_path)


def create_pipeline_file(k):
    pipeline_yaml_path = f"./pipelines/add10-{k}.yaml"

    file_content = """apiVersion: mlops.seldon.io/v1alpha1
kind: Pipeline
metadata:
  name: pipeline-add10-""" + str(k) + """
spec:
  steps:
    - name: add10-0
"""
    for i in range(1, k):
        file_content += """    - name: add10-""" + str(i) + """
      inputs:
        - add10-""" + str(i-1) + """
      tensorMap:
        add10-""" + str(i-1) + """.outputs.sum: INPUT
"""
    
    file_content += """  output:
    steps:
    - add10-""" + str(k-1) + """
"""

    if not os.path.exists(pipeline_yaml_path):
        with open(pipeline_yaml_path, "w") as pipeline_yaml:
            pipeline_yaml.write(file_content)


def remove_pipeline_file(k):
    pipeline_yaml_path = f"./pipelines/add10-{k}.yaml"
    if os.path.exists(pipeline_yaml_path):
        os.remove(pipeline_yaml_path)

In [2]:
import time
import json

def many_models_experiment(n_hops=1, n_repeats=1):
    for i in range(n_hops):
        create_model_files(i)

    create_pipeline_file(n_hops)

    for i in range(n_hops):
        !seldon model load -f ./models/add10-{i}.yaml

    !seldon pipeline load -f ./pipelines/add10-{n_hops}.yaml

    time.sleep(1)

    input_list = [1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16]
    request_string = '{"model_name":"add10", "inputs":[{"name":"INPUT","contents":{"int_contents":' + str(input_list) + '},"datatype":"INT32","shape":[1,' + str(len(input_list)) + ']}]}'
    expected_output_list = [x + 10 * n_hops for x in input_list]

    print(f"Starting anti cold start call...", end='')
    !seldon pipeline infer 'pipeline-add10-{n_hops}' --inference-mode grpc '{request_string}'
    print(" Done!")

    times = []
    try:
        for i in range(n_repeats):
            print(f"Starting call {i}...", end='')
            start = time.time_ns()
            output = !seldon pipeline infer 'pipeline-add10-{n_hops}' --inference-mode grpc '{request_string}'
            end = time.time_ns()
            print(" Done!")

            # validate output
            response_json = json.loads(output[0])
            output_list = response_json["outputs"][0]["contents"]["fp64Contents"]
            if not expected_output_list == output_list:
                raise ValueError(f"Expected {expected_output_list} but got {output}")

            times.append(end-start)

        return times
    except Exception as e:
        print(e)
        raise
    finally:
        # pass
        for i in range(n_hops):
            !seldon model unload add10-{i}

        !seldon pipeline unload pipeline-add10-{n_hops}


        for i in range (n_hops):
            remove_model_files(i)
        remove_pipeline_file(n_hops)    


def single_model_experiment(n_hops=1, n_repeats=1):
    !seldon model load -f ./models/add10.yaml

    if n_hops not in [1, 2, 5, 10, 20, 40, 60, 80]:
        raise ValueError(f"No defined test caller for {n_hops} hops")

    !seldon model load -f ./models/test-caller{n_hops}.yaml

    !seldon pipeline load -f ./pipelines/latency-test.yaml

    time.sleep(1)

    input_list = [1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16]
    request_string = '{"model_name":"test_caller_1", "inputs":[{"name":"INPUT0","contents":{"int_contents":' + str(input_list) + '},"datatype":"INT32","shape":[1,' + str(len(input_list)) + ']}]}'
    expected_output_list = [x + 10 * n_hops for x in input_list]

    times = []
    try:
        for i in range(n_repeats):
            print(f"Starting call {i}...", end='')
            start = time.time_ns()
            output = !seldon pipeline infer latency-test --inference-mode grpc '{request_string}'
            end = time.time_ns()
            print(" Done!")

            # validate output
            # print(output)
            response_json = json.loads(output[0])
            output_list = response_json["outputs"][0]["contents"]["fp64Contents"]
            if not expected_output_list == output_list:
                raise ValueError(f"Expected {expected_output_list} but got {output}")

            times.append(end-start)
    
        return times
    except Exception as e:
        print(e)
        raise
    finally:
        !seldon pipeline unload latency-test

        !seldon model unload test-caller

        !seldon model unload add10

In [3]:
import numpy as np

def print_stats(times_list):
    times_array = np.array(times_list)
    print(f"Min: {min(times_array)/1000000.0:.2f}ms", )
    print(f"Max: {max(times_array)/1000000.0:.2f}ms", )
    print(f"Mean: {np.mean(times_array)/1000000.0:.2f}ms")
    print(f"Median: {np.median(times_array)/1000000.0:.2f}ms")
    print(f"P90: {np.percentile(times_array, 90)/1000000.0:.2f}ms")
    print(f"P95: {np.percentile(times_array, 95)/1000000.0:.2f}ms")
    print(f"P99: {np.percentile(times_array, 99)/1000000.0:.2f}ms")


In [4]:
many_models_times_1 = many_models_experiment(1, 20);
single_model_times_1 = single_model_experiment(1, 20);

{}
Starting anti cold start call...{"outputs":[{"name":"sum","datatype":"FP64","shape":["1","16"],"parameters":{"content_type":{"stringParam":"np"}},"contents":{"fp64Contents":[11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26]}}]}
 Done!
Starting call 0... Done!
Starting call 1... Done!
Starting call 2... Done!
Starting call 3... Done!
Starting call 4... Done!
Starting call 5... Done!
Starting call 6... Done!
Starting call 7... Done!
Starting call 8... Done!
Starting call 9... Done!
Starting call 10... Done!
Starting call 11... Done!
Starting call 12... Done!
Starting call 13... Done!
Starting call 14... Done!
Starting call 15... Done!
Starting call 16... Done!
Starting call 17... Done!
Starting call 18... Done!
Starting call 19... Done!
{}
{}
Starting call 0... Done!
Starting call 1... Done!
Starting call 2... Done!
Starting call 3... Done!
Starting call 4... Done!
Starting call 5... Done!
Starting call 6... Done!
Starting call 7... Done!
Starting call 8... Done!
Starting call 9... Don

In [5]:
print("Many hops")
print_stats(many_models_times_1)
print()
print("No hops")
print_stats(single_model_times_1)

Many hops
Min: 25.16ms
Max: 45.96ms
Mean: 28.00ms
Median: 26.95ms
P90: 29.01ms
P95: 30.21ms
P99: 42.81ms

No hops
Min: 29.14ms
Max: 1420.53ms
Mean: 99.90ms
Median: 30.02ms
P90: 32.43ms
P95: 102.78ms
P99: 1156.98ms


In [6]:
many_models_times_2 = many_models_experiment(2, 20);
single_model_times_2 = single_model_experiment(2, 20);

{}
{}
Starting anti cold start call...{"outputs":[{"name":"sum","datatype":"FP64","shape":["1","16"],"parameters":{"content_type":{"stringParam":"np"}},"contents":{"fp64Contents":[21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36]}}]}
 Done!
Starting call 0... Done!
Starting call 1... Done!
Starting call 2... Done!
Starting call 3... Done!
Starting call 4... Done!
Starting call 5... Done!
Starting call 6... Done!
Starting call 7... Done!
Starting call 8... Done!
Starting call 9... Done!
Starting call 10... Done!
Starting call 11... Done!
Starting call 12... Done!
Starting call 13... Done!
Starting call 14... Done!
Starting call 15... Done!
Starting call 16... Done!
Starting call 17... Done!
Starting call 18... Done!
Starting call 19... Done!
{}
{}
Starting call 0... Done!
Starting call 1... Done!
Starting call 2... Done!
Starting call 3... Done!
Starting call 4... Done!
Starting call 5... Done!
Starting call 6... Done!
Starting call 7... Done!
Starting call 8... Done!
Starting call 9... 

In [7]:
print("Many hops")
print_stats(many_models_times_2)
print()
print("No hops")
print_stats(single_model_times_2)

Many hops
Min: 30.44ms
Max: 54.70ms
Mean: 33.50ms
Median: 31.91ms
P90: 36.54ms
P95: 37.89ms
P99: 51.34ms

No hops
Min: 30.67ms
Max: 45.40ms
Mean: 34.06ms
Median: 33.36ms
P90: 36.44ms
P95: 39.75ms
P99: 44.27ms


In [8]:
many_models_times_5 = many_models_experiment(5, 20);
single_model_times_5 = single_model_experiment(5, 20);

{}
{}
{}
{}
{}
Starting anti cold start call...{"outputs":[{"name":"sum","datatype":"FP64","shape":["1","16"],"parameters":{"content_type":{"stringParam":"np"}},"contents":{"fp64Contents":[51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66]}}]}
 Done!
Starting call 0... Done!
Starting call 1... Done!
Starting call 2... Done!
Starting call 3... Done!
Starting call 4... Done!
Starting call 5... Done!
Starting call 6... Done!
Starting call 7... Done!
Starting call 8... Done!
Starting call 9... Done!
Starting call 10... Done!
Starting call 11... Done!
Starting call 12... Done!
Starting call 13... Done!
Starting call 14... Done!
Starting call 15... Done!
Starting call 16... Done!
Starting call 17... Done!
Starting call 18... Done!
Starting call 19... Done!
{}
{}
Starting call 0... Done!
Starting call 1... Done!
Starting call 2... Done!
Starting call 3... Done!
Starting call 4... Done!
Starting call 5... Done!
Starting call 6... Done!
Starting call 7... Done!
Starting call 8... Done!
Starting c

In [9]:
print("Many hops")
print_stats(many_models_times_5)
print()
print("No hops")
print_stats(single_model_times_5)

Many hops
Min: 40.35ms
Max: 64.01ms
Mean: 46.15ms
Median: 43.63ms
P90: 54.86ms
P95: 58.84ms
P99: 62.97ms

No hops
Min: 37.24ms
Max: 68.14ms
Mean: 40.60ms
Median: 38.37ms
P90: 43.89ms
P95: 47.87ms
P99: 64.08ms


In [10]:
many_models_times_10 = many_models_experiment(10, 20);
single_model_times_10 = single_model_experiment(10, 20);

{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
Starting anti cold start call...{"outputs":[{"name":"sum","datatype":"FP64","shape":["1","16"],"parameters":{"content_type":{"stringParam":"np"}},"contents":{"fp64Contents":[101,102,103,104,105,106,107,108,109,110,111,112,113,114,115,116]}}]}
 Done!
Starting call 0... Done!
Starting call 1... Done!
Starting call 2... Done!
Starting call 3... Done!
Starting call 4... Done!
Starting call 5... Done!
Starting call 6... Done!
Starting call 7... Done!
Starting call 8... Done!
Starting call 9... Done!
Starting call 10... Done!
Starting call 11... Done!
Starting call 12... Done!
Starting call 13... Done!
Starting call 14... Done!
Starting call 15... Done!
Starting call 16... Done!
Starting call 17... Done!
Starting call 18... Done!
Starting call 19... Done!
{}
{}
Starting call 0... Done!
Starting call 1... Done!
Starting call 2... Done!
Starting call 3... Done!
Starting call 4... Done!
Starting call 5... Done!
Starting call 6... Done!
Starting call 7... Done!
Star

In [11]:
print("Many hops")
print_stats(many_models_times_10)
print()
print("No hops")
print_stats(single_model_times_10)

Many hops
Min: 56.92ms
Max: 67.44ms
Mean: 62.62ms
Median: 62.75ms
P90: 66.95ms
P95: 67.00ms
P99: 67.35ms

No hops
Min: 49.21ms
Max: 62.62ms
Mean: 51.88ms
Median: 50.63ms
P90: 55.76ms
P95: 57.53ms
P99: 61.60ms


In [12]:
many_models_times_20 = many_models_experiment(20, 20);
single_model_times_20 = single_model_experiment(20, 20);

{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
Starting anti cold start call...{"outputs":[{"name":"sum","datatype":"FP64","shape":["1","16"],"parameters":{"content_type":{"stringParam":"np"}},"contents":{"fp64Contents":[201,202,203,204,205,206,207,208,209,210,211,212,213,214,215,216]}}]}
 Done!
Starting call 0... Done!
Starting call 1... Done!
Starting call 2... Done!
Starting call 3... Done!
Starting call 4... Done!
Starting call 5... Done!
Starting call 6... Done!
Starting call 7... Done!
Starting call 8... Done!
Starting call 9... Done!
Starting call 10... Done!
Starting call 11... Done!
Starting call 12... Done!
Starting call 13... Done!
Starting call 14... Done!
Starting call 15... Done!
Starting call 16... Done!
Starting call 17... Done!
Starting call 18... Done!
Starting call 19... Done!
{}
{}
Starting call 0... Done!
Starting call 1... Done!
Starting call 2... Done!
Starting call 3... Done!
Starting call 4... Done!
Starting call 5... Done!
Starting call 6... Done!

In [13]:
print("Many hops")
print_stats(many_models_times_20)
print()
print("No hops")
print_stats(single_model_times_20)

Many hops
Min: 88.86ms
Max: 116.17ms
Mean: 101.58ms
Median: 99.93ms
P90: 111.73ms
P95: 114.35ms
P99: 115.81ms

No hops
Min: 73.18ms
Max: 79.78ms
Mean: 75.92ms
Median: 75.61ms
P90: 77.95ms
P95: 78.33ms
P99: 79.49ms


In [14]:
many_models_times_40 = many_models_experiment(40, 20);
single_model_times_40 = single_model_experiment(40, 20);

{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
Starting anti cold start call...{"outputs":[{"name":"sum","datatype":"FP64","shape":["1","16"],"parameters":{"content_type":{"stringParam":"np"}},"contents":{"fp64Contents":[401,402,403,404,405,406,407,408,409,410,411,412,413,414,415,416]}}]}
 Done!
Starting call 0... Done!
Starting call 1... Done!
Starting call 2... Done!
Starting call 3... Done!
Starting call 4... Done!
Starting call 5... Done!
Starting call 6... Done!
Starting call 7... Done!
Starting call 8... Done!
Starting call 9... Done!
Starting call 10... Done!
Starting call 11... Done!
Starting call 12... Done!
Starting call 13... Done!
Starting call 14... Done!
Starting call 15... Done!
Starting call 16... Done!
Starting call 17... Done!
Starting call 18... Done!
Starting call 19... Done!
{}
{}
Starting call 0... Done!
Starting call 1... Done!
Starting call 2... Done!
Starting call 3... Done!
Starting call 

In [15]:
print("Many hops")
print_stats(many_models_times_40)
print()
print("No hops")
print_stats(single_model_times_40)

Many hops
Min: 158.74ms
Max: 216.67ms
Mean: 177.18ms
Median: 176.81ms
P90: 194.34ms
P95: 201.13ms
P99: 213.56ms

No hops
Min: 123.00ms
Max: 144.07ms
Mean: 126.08ms
Median: 124.36ms
P90: 128.36ms
P95: 130.99ms
P99: 141.45ms


In [16]:
many_models_times_60 = many_models_experiment(60, 20);
single_model_times_60 = single_model_experiment(60, 20);

{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
Starting anti cold start call...{"outputs":[{"name":"sum","datatype":"FP64","shape":["1","16"],"parameters":{"content_type":{"stringParam":"np"}},"contents":{"fp64Contents":[601,602,603,604,605,606,607,608,609,610,611,612,613,614,615,616]}}]}
 Done!
Starting call 0... Done!
Starting call 1... Done!
Starting call 2... Done!
Starting call 3... Done!
Starting call 4... Done!
Starting call 5... Done!
Starting call 6... Done!
Starting call 7... Done!
Starting call 8... Done!
Starting call 9... Done!
Starting call 10... Done!
Starting call 11... Done!
Starting call 12... Done!
Starting call 13... Done!
Starting call 14... Done!
Starting call 15... Done!
Starting call 16... Done!
Starting call 17... Done!
Starting call 18... Done!
Starting call 19... Done!
{}
{}
Starting call 0... Done!
Starting call 1... Done!
Star

In [17]:
print("Many hops")
print_stats(many_models_times_60)
print()
print("No hops")
print_stats(single_model_times_60)

Many hops
Min: 227.11ms
Max: 285.58ms
Mean: 255.10ms
Median: 254.70ms
P90: 274.74ms
P95: 281.56ms
P99: 284.78ms

No hops
Min: 171.30ms
Max: 177.99ms
Mean: 173.87ms
Median: 173.26ms
P90: 176.86ms
P95: 177.51ms
P99: 177.89ms


In [18]:
many_models_times_80 = many_models_experiment(80, 20);
single_model_times_80 = single_model_experiment(80, 20);

{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
Starting anti cold start call...{"outputs":[{"name":"sum","datatype":"FP64","shape":["1","16"],"parameters":{"content_type":{"stringParam":"np"}},"contents":{"fp64Contents":[801,802,803,804,805,806,807,808,809,810,811,812,813,814,815,816]}}]}
 Done!
Starting call 0... Done!
Starting call 1... Done!
Starting call 2... Done!
Starting call 3... Done!
Starting call 4... Done!
Starting call 5... Done!
Starting call 6... Done!
Starting call 7... Done!
Starting call 8... Done!
Starting call 9... Done!
Starting call 10... Done!
Starting call 11... Done!
Starting call 12... Done!
Starting call 13... Done!
Starting call 14... Done!
Starting call 15... Done!
Starting call 16... Done!
Starting call 17... Done!
Starting call 18... Done!
Starting call 19... Done!


In [19]:
print("Many hops")
print_stats(many_models_times_80)
print()
print("No hops")
print_stats(single_model_times_80)

Many hops
Min: 282.67ms
Max: 320.52ms
Mean: 302.73ms
Median: 301.84ms
P90: 313.40ms
P95: 320.44ms
P99: 320.50ms

No hops
Min: 219.86ms
Max: 232.68ms
Mean: 225.04ms
Median: 224.73ms
P90: 227.84ms
P95: 232.33ms
P99: 232.61ms


In [23]:
import numpy as np

many_hops = [
    many_models_times_1,
    many_models_times_2,
    many_models_times_5,
    many_models_times_10,
    many_models_times_20,
    many_models_times_40,
    many_models_times_60,
    many_models_times_80
]

no_hops = [
    single_model_times_1,
    single_model_times_2,
    single_model_times_5,
    single_model_times_10,
    single_model_times_20,
    single_model_times_40,
    single_model_times_60,
    single_model_times_80
]

In [26]:
np.savetxt("many_models.csv", np.array(many_hops), delimiter=",", header="1,2,5,10,20,40,60,80") 
np.savetxt("single_model.csv", np.array(no_hops), delimiter=",", header="1,2,5,10,20,40,60,80") 