In [1]:
import os
import shutil


def create_model_files(k):

#     dir_path = f"./models/add10-{k}"
#     if not os.path.exists(dir_path):
#         os.makedirs(dir_path)

#     model_file_path = os.path.join(dir_path, "model.py")
#     if not os.path.exists(model_file_path):
#         shutil.copy(f"./models/add10/model.py", model_file_path)

#     model_settings_file_path = os.path.join(dir_path, "model-settings.json")
#     if not os.path.exists(model_settings_file_path):
#         with open(model_settings_file_path, "w") as model_settings_file:
#             model_settings_file.write("""{
#     "name": "add10-""" + str(k) + """",
#     "implementation": "model.Add10",
#     "parameters": {
#         "version": "v0.0.1"
#     }
# }
#         """)

    model_yaml_path = f"./models/add10-{k}.yaml"
    if not os.path.exists(model_yaml_path):
        with open(model_yaml_path, "w") as model_yaml:
            model_yaml.write("""apiVersion: mlops.seldon.io/v1alpha1
kind: Model
metadata:
  name: add10-""" + str(k) + """
spec:
  storageUri: "gs://seldon-models/scv2/examples/latency-tests/mlserver/add10"
  requirements:
  - mlserver
  - python
        """)


def remove_model_files(k):
    dir_path = f"./models/add10-{k}"
    if os.path.exists(dir_path):
        shutil.rmtree(dir_path)

    model_yaml_path = f"./models/add10-{k}.yaml"
    if os.path.exists(model_yaml_path):
        os.remove(model_yaml_path)


def create_pipeline_file(k):
    pipeline_yaml_path = f"./pipelines/add10-{k}.yaml"

    file_content = """apiVersion: mlops.seldon.io/v1alpha1
kind: Pipeline
metadata:
  name: pipeline-add10-""" + str(k) + """
spec:
  steps:
    - name: add10-0
"""
    for i in range(1, k):
        file_content += """    - name: add10-""" + str(i) + """
      inputs:
        - add10-""" + str(i-1) + """
      tensorMap:
        add10-""" + str(i-1) + """.outputs.sum: INPUT
"""
    
    file_content += """  output:
    steps:
    - add10-""" + str(k-1) + """
"""

    if not os.path.exists(pipeline_yaml_path):
        with open(pipeline_yaml_path, "w") as pipeline_yaml:
            pipeline_yaml.write(file_content)


def remove_pipeline_file(k):
    pipeline_yaml_path = f"./pipelines/add10-{k}.yaml"
    if os.path.exists(pipeline_yaml_path):
        os.remove(pipeline_yaml_path)

In [7]:
import time
import json

def many_models_experiment(n_hops=1, n_repeats=1):
    for i in range(n_hops):
        create_model_files(i)

    create_pipeline_file(n_hops)

    for i in range(n_hops):
        !seldon model load -f ./models/add10-{i}.yaml

    !seldon pipeline load -f ./pipelines/add10-{n_hops}.yaml

    time.sleep(1)

    input_list = [1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16]
    request_string = '{"model_name":"add10", "inputs":[{"name":"INPUT","contents":{"int_contents":' + str(input_list) + '},"datatype":"INT32","shape":[1,' + str(len(input_list)) + ']}]}'
    expected_output_list = [x + 10 * n_hops for x in input_list]

    times = []
    try:
        for i in range(n_repeats):
            print(f"Starting call {i}...", end='')
            start = time.time()
            output = !seldon pipeline infer 'pipeline-add10-{n_hops}' --inference-mode grpc '{request_string}'
            end = time.time()
            print(" Done!")

            # validate output
            print(end-start)
            # print(output)
            response_json = json.loads(output[0])
            output_list = response_json["outputs"][0]["contents"]["fp64Contents"]
            if not expected_output_list == output_list:
                raise ValueError(f"Expected {expected_output_list} but got {output}")

            times.append(end-start)

        return times
    except Exception as e:
        print(e)
        raise
    finally:
        # pass
        for i in range(n_hops):
            !seldon model unload add10-{i}

        !seldon pipeline unload pipeline-add10-{n_hops}


        for i in range (n_hops):
            remove_model_files(i)
        remove_pipeline_file(n_hops)    


def single_model_experiment(n_hops=1, n_repeats=1):
    !seldon model load -f ./models/add10.yaml

    if n_hops not in [1, 2, 5, 10, 20, 40, 60, 80]:
        raise ValueError(f"No defined test caller for {n_hops} hops")

    !seldon model load -f ./models/test-caller{n_hops}.yaml

    !seldon pipeline load -f ./pipelines/latency-test.yaml

    time.sleep(1)

    input_list = [1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16]
    request_string = '{"model_name":"test_caller_1", "inputs":[{"name":"INPUT0","contents":{"int_contents":' + str(input_list) + '},"datatype":"INT32","shape":[1,' + str(len(input_list)) + ']}]}'
    expected_output_list = [x + 10 * n_hops for x in input_list]

    times = []
    try:
        for i in range(n_repeats):
            print(f"Starting call {i}...", end='')
            start = time.time()
            output = !seldon pipeline infer latency-test --inference-mode grpc '{request_string}'
            end = time.time()
            print(" Done!")

            # validate output
            # print(output)
            response_json = json.loads(output[0])
            output_list = response_json["outputs"][0]["contents"]["fp64Contents"]
            if not expected_output_list == output_list:
                raise ValueError(f"Expected {expected_output_list} but got {output}")

            times.append(end-start)
    
        return times
    except Exception as e:
        print(e)
        raise
    finally:
        !seldon pipeline unload latency-test

        !seldon model unload test-caller

        !seldon model unload add10

In [8]:
import numpy as np

def print_stats(times_list):
    times_array = np.array(times_list)
    print(f"Min: {min(times_array)*1000:.1f}ms", )
    print(f"Max: {max(times_array)*1000:.1f}ms", )
    print(f"Mean: {np.mean(times_array)*1000:.1f}ms")
    print(f"Median: {np.median(times_array)*1000:.1f}ms")
    print(f"P90: {np.percentile(times_array, 90)*1000:.1f}ms")
    print(f"P99: {np.percentile(times_array, 99)*1000:.1f}ms")


In [12]:
many_models_times_1 = many_models_experiment(1, 20);
single_model_times_1 = single_model_experiment(1, 20);

{}
Starting call 0... Done!
0.04725146293640137
Starting call 1... Done!
0.045262813568115234
Starting call 2... Done!
0.04134225845336914
Starting call 3... Done!
0.03726935386657715
Starting call 4... Done!
0.040702104568481445
Starting call 5... Done!
0.04496002197265625
Starting call 6... Done!
0.03897428512573242
Starting call 7... Done!
0.042041778564453125
Starting call 8... Done!
0.03793764114379883
Starting call 9... Done!
0.03796505928039551
Starting call 10... Done!
0.0373075008392334
Starting call 11... Done!
0.040802955627441406
Starting call 12... Done!
0.03971052169799805
Starting call 13... Done!
0.039040565490722656
Starting call 14... Done!
0.03886294364929199
Starting call 15... Done!
0.03808188438415527
Starting call 16... Done!
0.042638540267944336
Starting call 17... Done!
0.04277682304382324
Starting call 18... Done!
0.038761138916015625
Starting call 19... Done!
0.039861202239990234
{}
{}
Starting call 0... Done!
Starting call 1... Done!
Starting call 2... Done!

In [13]:
print("Many hops")
print_stats(many_models_times_1)
print()
print("No hops")
print_stats(single_model_times_1)

Many hops
Min: 37.3ms
Max: 47.3ms
Mean: 40.6ms
Median: 39.8ms
P90: 45.0ms
P99: 46.9ms

No hops
Min: 43.4ms
Max: 53.1ms
Mean: 46.5ms
Median: 46.4ms
P90: 48.0ms
P99: 52.2ms


In [14]:
many_models_times_2 = many_models_experiment(2, 20);
single_model_times_2 = single_model_experiment(2, 20);

{}
{}
Starting call 0... Done!
1.1652355194091797
Starting call 1... Done!
0.05282926559448242
Starting call 2... Done!
0.048305511474609375
Starting call 3... Done!
0.05251455307006836
Starting call 4... Done!
0.04918694496154785
Starting call 5... Done!
0.04968595504760742
Starting call 6... Done!
0.048525094985961914
Starting call 7... Done!
0.05069303512573242
Starting call 8... Done!
0.05263495445251465
Starting call 9... Done!
0.05099368095397949
Starting call 10... Done!
0.04880690574645996
Starting call 11... Done!
0.045352935791015625
Starting call 12... Done!
0.04604458808898926
Starting call 13... Done!
0.04833841323852539
Starting call 14... Done!
0.04806327819824219
Starting call 15... Done!
0.046973466873168945
Starting call 16... Done!
0.04687380790710449
Starting call 17... Done!
0.04880023002624512
Starting call 18... Done!
0.05048990249633789
Starting call 19... Done!
0.05148887634277344
{}
{}
Starting call 0... Done!
Starting call 1... Done!
Starting call 2... Done!


In [15]:
print("Many hops")
print_stats(many_models_times_2)
print()
print("No hops")
print_stats(single_model_times_2)

Many hops
Min: 45.4ms
Max: 1165.2ms
Mean: 105.1ms
Median: 49.0ms
P90: 52.7ms
P99: 953.9ms

No hops
Min: 49.7ms
Max: 104.0ms
Mean: 56.7ms
Median: 53.9ms
P90: 60.1ms
P99: 95.7ms


In [16]:
many_models_times_5 = many_models_experiment(5, 20);
single_model_times_5 = single_model_experiment(5, 20);

{}
{}
{}
{}
{}
Starting call 0... Done!
1.1381912231445312
Starting call 1... Done!
0.07648038864135742
Starting call 2... Done!
0.08040237426757812
Starting call 3... Done!
0.07757306098937988
Starting call 4... Done!
0.07085227966308594
Starting call 5... Done!
0.07102775573730469
Starting call 6... Done!
0.07282471656799316
Starting call 7... Done!
0.0715034008026123
Starting call 8... Done!
0.07218003273010254
Starting call 9... Done!
0.06885027885437012
Starting call 10... Done!
0.07143163681030273
Starting call 11... Done!
0.07063865661621094
Starting call 12... Done!
0.07094264030456543
Starting call 13... Done!
0.0716557502746582
Starting call 14... Done!
0.0706777572631836
Starting call 15... Done!
0.07269406318664551
Starting call 16... Done!
0.06865453720092773
Starting call 17... Done!
0.06626653671264648
Starting call 18... Done!
0.07443022727966309
Starting call 19... Done!
0.07203078269958496
{}
{}
Starting call 0... Done!
Starting call 1... Done!
Starting call 2... Done

In [17]:
print("Many hops")
print_stats(many_models_times_5)
print()
print("No hops")
print_stats(single_model_times_5)

Many hops
Min: 66.3ms
Max: 1138.2ms
Mean: 125.5ms
Median: 71.6ms
P90: 77.9ms
P99: 937.2ms

No hops
Min: 66.1ms
Max: 98.9ms
Mean: 73.6ms
Median: 71.9ms
P90: 79.6ms
P99: 95.3ms


In [18]:
many_models_times_10 = many_models_experiment(10, 20);
single_model_times_10 = single_model_experiment(10, 20);

{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
Starting call 0... Done!
0.1481609344482422
Starting call 1... Done!
0.1105501651763916
Starting call 2... Done!
0.11346650123596191
Starting call 3... Done!
0.1112520694732666
Starting call 4... Done!
0.11743545532226562
Starting call 5... Done!
0.11427497863769531
Starting call 6... Done!
0.1040046215057373
Starting call 7... Done!
0.11158990859985352
Starting call 8... Done!
0.10776591300964355
Starting call 9... Done!
0.10921287536621094
Starting call 10... Done!
0.11022520065307617
Starting call 11... Done!
0.10467696189880371
Starting call 12... Done!
0.1113290786743164
Starting call 13... Done!
0.10966992378234863
Starting call 14... Done!
0.10718703269958496
Starting call 15... Done!
0.10712027549743652
Starting call 16... Done!
0.11545181274414062
Starting call 17... Done!
0.11968803405761719
Starting call 18... Done!
0.10873270034790039
Starting call 19... Done!
0.10706496238708496
{}
{}
Starting call 0... Done!
Starting call 1... Done!
Starting 

In [19]:
print("Many hops")
print_stats(many_models_times_10)
print()
print("No hops")
print_stats(single_model_times_10)

Many hops
Min: 104.0ms
Max: 148.2ms
Mean: 112.4ms
Median: 110.4ms
P90: 117.7ms
P99: 142.8ms

No hops
Min: 96.9ms
Max: 111.1ms
Mean: 104.3ms
Median: 104.2ms
P90: 110.2ms
P99: 111.0ms


In [20]:
many_models_times_20 = many_models_experiment(20, 20);
single_model_times_20 = single_model_experiment(20, 20);

{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
Starting call 0... Done!
1.2508807182312012
Starting call 1... Done!
0.19411897659301758
Starting call 2... Done!
0.19829988479614258
Starting call 3... Done!
0.20495080947875977
Starting call 4... Done!
0.20829415321350098
Starting call 5... Done!
0.19698619842529297
Starting call 6... Done!
0.19079875946044922
Starting call 7... Done!
0.1869349479675293
Starting call 8... Done!
0.1879889965057373
Starting call 9... Done!
0.19184136390686035
Starting call 10... Done!
0.19646930694580078
Starting call 11... Done!
0.18277859687805176
Starting call 12... Done!
0.19838356971740723
Starting call 13... Done!
0.19995474815368652
Starting call 14... Done!
0.19782733917236328
Starting call 15... Done!
0.21710848808288574
Starting call 16... Done!
0.19152617454528809
Starting call 17... Done!
0.2049105167388916
Starting call 18... Done!
0.19354009628295898
Starting call 19... Done!
0.20560646057128906
{}
{}
Starting call 0... Done!
Sta

In [21]:
print("Many hops")
print_stats(many_models_times_20)
print()
print("No hops")
print_stats(single_model_times_20)

Many hops
Min: 182.8ms
Max: 1250.9ms
Mean: 250.0ms
Median: 197.4ms
P90: 209.2ms
P99: 1054.5ms

No hops
Min: 150.1ms
Max: 182.6ms
Mean: 162.8ms
Median: 162.1ms
P90: 172.9ms
P99: 180.8ms


In [22]:
many_models_times_40 = many_models_experiment(40, 20);
single_model_times_40 = single_model_experiment(40, 20);

{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
Starting call 0... Done!
1.4814021587371826
Starting call 1... Done!
0.40215349197387695
Starting call 2... Done!
0.4022641181945801
Starting call 3... Done!
0.38321638107299805
Starting call 4... Done!
0.37773823738098145
Starting call 5... Done!
0.39904165267944336
Starting call 6... Done!
0.3722836971282959
Starting call 7... Done!
0.3920323848724365
Starting call 8... Done!
0.3535637855529785
Starting call 9... Done!
0.35101914405822754
Starting call 10... Done!
0.34816551208496094
Starting call 11... Done!
0.3454713821411133
Starting call 12... Done!
0.3663604259490967
Starting call 13... Done!
0.41492772102355957
Starting call 14... Done!
0.35721802711486816
Starting call 15... Done!
0.35855627059936523
Starting call 16... Done!
0.3496551513671875
Starting call 17... Done!
0.3625025749206543
Starting call 18... Done!
0.3565855026245117
Starting call 19... Done!


In [23]:
print("Many hops")
print_stats(many_models_times_40)
print()
print("No hops")
print_stats(single_model_times_40)

Many hops
Min: 345.5ms
Max: 1481.4ms
Mean: 427.2ms
Median: 368.5ms
P90: 403.5ms
P99: 1278.8ms

No hops
Min: 268.5ms
Max: 300.6ms
Mean: 284.1ms
Median: 284.3ms
P90: 293.8ms
P99: 300.4ms


In [24]:
many_models_times_60 = many_models_experiment(60, 20);
single_model_times_60 = single_model_experiment(60, 20);

{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
Starting call 0... Done!
1.513014793395996
Starting call 1... Done!
0.5404806137084961
Starting call 2... Done!
0.5471129417419434
Starting call 3... Done!
0.5328717231750488
Starting call 4... Done!
0.555715799331665
Starting call 5... Done!
0.5287187099456787
Starting call 6... Done!
0.5320584774017334
Starting call 7... Done!
0.5481021404266357
Starting call 8... Done!
0.5324785709381104
Starting call 9... Done!
0.5394852161407471
Starting call 10... Done!
0.5476527214050293
Starting call 11... Done!
0.5577707290649414
Starting call 12... Done!
0.555626630783081
Starting call 13... Done!
0.5130372047424316
Starting call 14... Done!
0.50958251953125
Starting call 15... Done!
0.5195631980895996
Starting call 16... Done!
0.5633339881896973
Starting call 17... Done!
0.49285221099853516
Starting call 18... Done

In [25]:
print("Many hops")
print_stats(many_models_times_60)
print()
print("No hops")
print_stats(single_model_times_60)

Many hops
Min: 492.9ms
Max: 1513.0ms
Mean: 582.6ms
Median: 536.2ms
P90: 558.3ms
P99: 1332.6ms

No hops
Min: 380.3ms
Max: 448.6ms
Mean: 412.9ms
Median: 409.6ms
P90: 437.2ms
P99: 447.0ms


In [26]:
many_models_times_80 = many_models_experiment(80, 20);
single_model_times_80 = single_model_experiment(80, 20);

{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
Starting call 0... Done!
1.2921667098999023
Starting call 1... Done!
0.6831233501434326
Starting call 2... Done!
0.7000508308410645
Starting call 3... Done!
0.6951956748962402
Starting call 4... Done!
0.6828844547271729
Starting call 5... Done!
0.666358232498169
Starting call 6... Done!
0.6785719394683838
Starting call 7... Done!
0.6637089252471924
Starting call 8... Done!
0.6527459621429443
Starting call 9... Done!
0.6814892292022705
Starting call 10... Done!
0.6472206115722656
Starting call 11... Done!
0.645960807800293
Starting call 12... Done!
0.6591973304748535
Starting call 13... Done!
0.6895053386688232
Starting call 14... Done!
0.6499497890472412
Starting call 15... Done!
0.6673460006713867
Starting call 16... Done!
0.6745090484619141
Startin

In [27]:
print("Many hops")
print_stats(many_models_times_80)
print()
print("No hops")
print_stats(single_model_times_80)

Many hops
Min: 644.3ms
Max: 1292.2ms
Mean: 701.2ms
Median: 673.8ms
P90: 695.7ms
P99: 1179.7ms

No hops
Min: 508.5ms
Max: 571.6ms
Mean: 531.8ms
Median: 532.8ms
P90: 548.3ms
P99: 569.6ms
