In [10]:
import requests
import time
import concurrent.futures
import base64

In [2]:
def base64_encode(image_path):
    with open(image_path, "rb") as image_file:
        return base64.b64encode(image_file.read()).decode("utf-8")

def test_single_request(encoded_image, url):
    start_time = time.time()
    response = requests.post(url, json={"image": encoded_image})
    elapsed_time = time.time() - start_time

    if response.status_code == 200:
        return elapsed_time
    else:
        return None

In [3]:
def load_test(encoded_image, url, num_requests):
    with concurrent.futures.ThreadPoolExecutor() as executor:
        futures = [executor.submit(test_single_request, encoded_image, url) for _ in range(num_requests)]
        results = [f.result() for f in concurrent.futures.as_completed(futures)]

    results = [result for result in results if result is not None]
    return results

In [14]:
def run_load_tests(url, sample_image, num_requests):

    encoded_image = base64_encode(sample_image)
    
    print(f"Starting load test with {num_requests} concurrent requests...")
    latencies = load_test(encoded_image, url, num_requests)

    average_latency = sum(latencies) / len(latencies)
    print(f"Average latency: {average_latency:.4f} seconds")
    print(f"Min latency: {min(latencies):.4f} seconds")
    print(f"Max latency: {max(latencies):.4f} seconds")

run_load_tests("http://localhost:8888/predict/", "public/sample_images/sample_cat.jpeg", 1)

Starting load test with 1 concurrent requests...
Average latency: 0.5274 seconds
Min latency: 0.5274 seconds
Max latency: 0.5274 seconds


### Multi requests ###

In [4]:
def test_batch_request(encoded_images, url):
    start_time = time.time()
    response = requests.post(url, json={"images": encoded_images})
    elapsed_time = time.time() - start_time

    if response.status_code == 200:
        return elapsed_time, response.json()
    else:
        print(f"Request failed with status code: {response.status_code}")
        return None

In [5]:
def concurrent_batch_test(encoded_images, url, num_requests):
   
    with concurrent.futures.ThreadPoolExecutor() as executor:
        futures = [executor.submit(test_batch_request, encoded_images, url) for _ in range(num_requests)]
        results = [f.result() for f in concurrent.futures.as_completed(futures)]
    
    results = [result for result in results if result is not None]
    return results

In [44]:
def run_load_test_batch(url, sample_image, batch_size, num_requests):

    encoded_image = base64_encode(sample_image)
    encoded_images = [encoded_image] * batch_size

    print(f"Starting batch test with {num_requests} concurrent requests...")
    test_results = concurrent_batch_test(encoded_images, url, num_requests)

    latencies = [result[0] for result in test_results]
    average_latency = sum(latencies) / len(latencies)
    print(f"Average latency: {average_latency:.4f} seconds")
    print(f"Min latency: {min(latencies):.4f} seconds")
    print(f"Max latency: {max(latencies):.4f} seconds")

# local
run_load_test_batch("http://localhost:9000/predict/", "public/sample_images/sample_cat.jpeg", 10, 50)

Starting batch test with 50 concurrent requests...
Average latency: 5.8854 seconds
Min latency: 1.3917 seconds
Max latency: 8.4781 seconds


In [6]:
def run_load_test_batch(url, sample_image, batch_size, num_requests):

    encoded_image = base64_encode(sample_image)
    encoded_images = [encoded_image] * batch_size

    print(f"Starting batch test with {num_requests} concurrent requests...")
    test_results = concurrent_batch_test(encoded_images, url, num_requests)

    latencies = [result[0] for result in test_results]
    average_latency = sum(latencies) / len(latencies)
    print(f"Average latency: {average_latency:.4f} seconds")
    print(f"Min latency: {min(latencies):.4f} seconds")
    print(f"Max latency: {max(latencies):.4f} seconds")

In [38]:
# Enclave without KMS test 1
run_load_test_batch("http://localhost:8888/predict/", "public/sample_images/sample_cat.jpeg", 10, 50)

Starting batch test with 50 concurrent requests...
Average latency: 36.7901 seconds


In [40]:
# Public test 1
run_load_test_batch("http://localhost:8888/predict/", "public/sample_images/sample_cat.jpeg", 10, 50)

Starting batch test with 50 concurrent requests...
Average latency: 12.6687 seconds


In [43]:
# Enclave with KMS test 1
run_load_test_batch("http://localhost:8888/predict/", "public/sample_images/sample_cat.jpeg", 10, 50)

Starting batch test with 50 concurrent requests...
Average latency: 16.0052 seconds


In [7]:
# Enclave without KMS test 2
run_load_test_batch("http://localhost:8888/predict/", "public/sample_images/sample_cat.jpeg", 10, 50)

Starting batch test with 50 concurrent requests...
Average latency: 13.3696 seconds
Min latency: 5.2312 seconds
Max latency: 17.0188 seconds


In [8]:
# Public test 2
run_load_test_batch("http://localhost:8888/predict/", "public/sample_images/sample_cat.jpeg", 10, 50)

Starting batch test with 50 concurrent requests...
Average latency: 14.2366 seconds
Min latency: 5.2022 seconds
Max latency: 16.4312 seconds


In [9]:
# Enclave with KMS test 2
run_load_test_batch("http://localhost:8888/predict/", "public/sample_images/sample_cat.jpeg", 10, 50)

Starting batch test with 50 concurrent requests...
Average latency: 16.2098 seconds
Min latency: 6.1972 seconds
Max latency: 20.0231 seconds


### Larger batch size ###

In [52]:
# Enclave without KMS test 2
run_load_test_batch("http://localhost:8888/predict/", "public/sample_images/sample_cat.jpeg", 20, 50)

Starting batch test with 50 concurrent requests...
Average latency: 16.4848 seconds
Min latency: 8.9849 seconds
Max latency: 25.3730 seconds


In [50]:
# Public test 2
run_load_test_batch("http://localhost:8888/predict/", "public/sample_images/sample_cat.jpeg", 20, 50)

Starting batch test with 50 concurrent requests...
Average latency: 20.1880 seconds
Min latency: 9.0492 seconds
Max latency: 26.8044 seconds


In [51]:
# Enclave with KMS test 2
run_load_test_batch("http://localhost:8888/predict/", "public/sample_images/sample_cat.jpeg", 20, 50)

Starting batch test with 50 concurrent requests...
Average latency: 20.9921 seconds
Min latency: 9.0141 seconds
Max latency: 26.1185 seconds


### More concurrent requests ###

In [54]:
# Enclave without KMS test 2
run_load_test_batch("http://localhost:8888/predict/", "public/sample_images/sample_cat.jpeg", 20, 100)

Starting batch test with 100 concurrent requests...
Average latency: 17.4543 seconds
Min latency: 8.8289 seconds
Max latency: 25.4990 seconds


In [53]:
# Public test 2
run_load_test_batch("http://localhost:8888/predict/", "public/sample_images/sample_cat.jpeg", 20, 100)

Starting batch test with 100 concurrent requests...
Average latency: 16.4251 seconds
Min latency: 9.2622 seconds
Max latency: 22.8421 seconds


In [55]:
# Enclave with KMS test 2
run_load_test_batch("http://localhost:8888/predict/", "public/sample_images/sample_cat.jpeg", 20, 100)

Starting batch test with 100 concurrent requests...
Average latency: 18.3277 seconds
Min latency: 7.4616 seconds
Max latency: 25.9433 seconds


### Test with Nginx, 100 requests, batch size 20 ###

In [7]:
# Enclave without KMS test 1
run_load_test_batch("http://18.223.112.165/predict/", "public/sample_images/sample_cat.jpeg", 20, 100)

Starting batch test with 100 concurrent requests...
Average latency: 14.7461 seconds
Min latency: 4.3754 seconds
Max latency: 24.1672 seconds


In [8]:
# Public test 1
run_load_test_batch("http://18.223.112.165/predict/", "public/sample_images/sample_cat.jpeg", 20, 100)

Starting batch test with 100 concurrent requests...
Average latency: 14.2051 seconds
Min latency: 7.7020 seconds
Max latency: 22.7423 seconds


In [9]:
# Enclave with KMS test 1
run_load_test_batch("http://18.223.112.165/predict/", "public/sample_images/sample_cat.jpeg", 20, 100)

Starting batch test with 100 concurrent requests...
Average latency: 14.5057 seconds
Min latency: 6.3706 seconds
Max latency: 20.9972 seconds


### Test with concurrent requests from different threads ###

In [21]:
def threaded_batch_test(encoded_images, url, num_threads, requests_per_thread):
   
    with concurrent.futures.ThreadPoolExecutor(max_workers = num_threads) as executor:
        futures = [executor.submit(test_batch_request, encoded_images, url) for _ in range(num_threads * requests_per_thread)]
        results = [f.result() for f in concurrent.futures.as_completed(futures)]
    
    results = [result for result in results if result is not None]
    return results

def run_load_test_batch(url, sample_image, batch_size, num_threads, requests_per_thread):

    encoded_image = base64_encode(sample_image)
    encoded_images = [encoded_image] * batch_size

    print(f"Using batch size of {batch_size} for each request")
    print(f"Starting batch test with {num_threads} threads and {requests_per_thread} requests per thread...")
    test_results = threaded_batch_test(encoded_images, url, num_threads, requests_per_thread)

    latencies = [result[0] for result in test_results]
    average_latency = sum(latencies) / len(latencies)
    print(f"Average latency: {average_latency:.4f} seconds")
    print(f"Min latency: {min(latencies):.4f} seconds")
    print(f"Max latency: {max(latencies):.4f} seconds")

In [31]:
# Public 1
run_load_test_batch("http://18.223.112.165/predict/", "public/sample_images/sample_cat.jpeg", 5,50, 2)

Using batch size of 5 for each request
Starting batch test with 50 threads and 2 requests per thread...
Average latency: 21.9317 seconds
Min latency: 9.3324 seconds
Max latency: 35.6516 seconds


In [30]:
# Enclave 1
run_load_test_batch("http://18.223.112.165/predict/", "public/sample_images/sample_cat.jpeg", 5, 50,2)

Using batch size of 5 for each request
Starting batch test with 50 threads and 2 requests per thread...
Average latency: 57.8437 seconds
Min latency: 24.0395 seconds
Max latency: 106.5628 seconds
Request failed with status code: 502
Request failed with status code: 502
Request failed with status code: 502
Request failed with status code: 502
Request failed with status code: 502
Request failed with status code: 502
Request failed with status code: 502
Request failed with status code: 502
Request failed with status code: 502
Request failed with status code: 502
Request failed with status code: 502
Request failed with status code: 502
Request failed with status code: 502
Request failed with status code: 502
Request failed with status code: 502
Request failed with status code: 502
Request failed with status code: 502
Request failed with status code: 502
Request failed with status code: 502
Request failed with status code: 502
Request failed with status code: 502
Request failed with status 

In [33]:
# Public 2
run_load_test_batch("http://18.223.112.165/predict/", "public/sample_images/sample_cat.jpeg", 2,100,2)

Using batch size of 2 for each request
Starting batch test with 100 threads and 2 requests per thread...
Average latency: 19.9695 seconds
Min latency: 7.0691 seconds
Max latency: 41.7985 seconds


In [32]:
# Enclave 2
run_load_test_batch("http://18.223.112.165/predict/", "public/sample_images/sample_cat.jpeg", 2, 100,2)

Using batch size of 2 for each request
Starting batch test with 100 threads and 2 requests per thread...
Average latency: 20.9098 seconds
Min latency: 8.1304 seconds
Max latency: 58.0721 seconds
