In [2]:
import requests
import time
import concurrent.futures
import base64

In [3]:
def base64_encode(image_path):
    with open(image_path, "rb") as image_file:
        return base64.b64encode(image_file.read()).decode("utf-8")

def test_single_request(encoded_image, url):
    start_time = time.time()
    response = requests.post(url, json={"image": encoded_image})
    elapsed_time = time.time() - start_time

    if response.status_code == 200:
        return elapsed_time
    else:
        return None

In [4]:
def load_test(encoded_image, url, num_requests):
    with concurrent.futures.ThreadPoolExecutor() as executor:
        futures = [executor.submit(test_single_request, encoded_image, url) for _ in range(num_requests)]
        results = [f.result() for f in concurrent.futures.as_completed(futures)]

    results = [result for result in results if result is not None]
    return results

In [6]:
def run_load_tests(url, sample_image, num_requests):

    encoded_image = base64_encode(sample_image)
    
    print(f"Starting load test with {num_requests} concurrent requests...")
    latencies = load_test(encoded_image, url, num_requests)

    average_latency = sum(latencies) / len(latencies)
    print(f"Average latency: {average_latency:.4f} seconds")
    print(f"Min latency: {min(latencies):.4f} seconds")
    print(f"Max latency: {max(latencies):.4f} seconds")

run_load_tests("http://localhost:8888/predict/", "public/sample_images/sample_cat.jpeg", 1)

Starting load test with 1 concurrent requests...
Average latency: 0.3378 seconds
Min latency: 0.3378 seconds
Max latency: 0.3378 seconds
