### Test Embedding Endpoint

In [1]:
import requests

In [2]:
def make_request(text):
    url = "http://localhost/predict"
    headers = {
        'accept': 'application/json',
        'Content-Type': 'application/json'
    }
    payload = {"text": text}
    
    response = requests.post(url, headers=headers, json=payload)
    return response.status_code, response.json()

In [None]:
text_input = "One the endpoint is up and running, this request will return the status code (200) and the data \
    (a dictionary that includes the embeddings, dimensions, model version, and inference time in milliseconds)."
status_code, data = make_request(text_input)
print("Status Code:", status_code)
print("Data:", data)

### Stress Test the Endpoint

In [1]:
import requests
import concurrent.futures
import time

URL = "http://localhost/predict"
DATA = {"text": "Small sentence to be embedded."}
HEADERS = {
'accept': 'application/json',
'Content-Type': 'application/json'
}

def call_inference(_):
    try:
        r = requests.post(URL, headers=HEADERS,json=DATA, timeout=5)
        return r.status_code
    except Exception as e:
        return str(e)

def stress_test(duration=60, concurrency=50):
    end_time = time.time() + duration
    with concurrent.futures.ThreadPoolExecutor(max_workers=concurrency) as executor:
        while time.time() < end_time:
            futures = [executor.submit(call_inference, i) for i in range(concurrency)]
            results = [f.result() for f in futures]
            print(results)

In [None]:
stress_test(duration=60, concurrency=200)