### Test Embedding Endpoint

In [16]:
import requests

In [11]:
def make_request(text):
    url = "http://localhost/predict"
    headers = {
        'accept': 'application/json',
        'Content-Type': 'application/json'
    }
    payload = {"text": text}
    
    response = requests.post(url, headers=headers, json=payload)
    return response.status_code, response.json()

In [13]:
text_input = "One the endpoint is up and running, this request will return the status code (200) and the data \
    (a dictionary that includes the embeddings, dimensions, model version, and inference time in milliseconds)."
status_code, data = make_request(text_input)
print("Status Code:", status_code)
print("Data:", data)

Status Code: 200
Data: {'embedding': [0.0015703454846516252, -0.04597385227680206, -0.09514722973108292, 0.012065425515174866, 0.008991300128400326, -0.002590947085991502, -0.12053705751895905, 0.011650974862277508, -0.0411858968436718, 0.010653400793671608, 0.016804123297333717, 0.00669176084920764, -0.024425165727734566, -0.03045264072716236, -0.020302068442106247, 0.003890052903443575, 0.0926298126578331, -0.04252620041370392, -0.039831772446632385, -0.026265082880854607, 0.10507100820541382, 0.04560420662164688, -0.004665344953536987, -0.0074605923146009445, -0.006964197847992182, 0.017093600705266, -0.04362376779317856, -0.0015724600525572896, 0.03198929876089096, 0.00028026424115523696, 0.041238781064748764, -0.028734257444739342, -0.04187624529004097, 0.10212308913469315, 0.11073760688304901, 0.030537189915776253, 0.039739955216646194, -0.07697316259145737, -0.007511836476624012, -0.0397360622882843, 0.01404514443129301, -0.0005825209664180875, -0.060614511370658875, 0.042733792

### Stress Test the Endpoint

In [1]:
import requests
import concurrent.futures
import time

URL = "http://localhost/predict"
DATA = {"text": "Small sentence to be embedded."}
HEADERS = {
'accept': 'application/json',
'Content-Type': 'application/json'
}

def call_inference(_):
    try:
        r = requests.post(URL, headers=HEADERS,json=DATA, timeout=5)
        return r.status_code
    except Exception as e:
        return str(e)

def stress_test(duration=60, concurrency=50):
    end_time = time.time() + duration
    with concurrent.futures.ThreadPoolExecutor(max_workers=concurrency) as executor:
        while time.time() < end_time:
            futures = [executor.submit(call_inference, i) for i in range(concurrency)]
            results = [f.result() for f in futures]
            print(results)

In [3]:
stress_test(duration=60, concurrency=200)

[200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, "HTTPConnectionPool(host='localhost', port=80): Read timed out. (read timeout=5)", 200, "HTTPConnectionPool(host='localhost', port=80): Rea