# Benchmark your Forefront resources
The following script can be used to get the response speed and requests per minute profile for a given resource based on your chosen model, resource, and request size (input + output token length).

In [None]:
import requests
import time

# Enter the Completions API URL to a model hosted on your resource
URL = ""
# Enter your API key that can be found in Settings -> API Keys
API_KEY = ""
# Enter the average token length of your prompts
TOKENS_IN = 300
# Enter the average token length of your completions, or max_tokens parameter
TOKENS_OUT = 30
# Enter the number of trials to average
N_TRIALS = 3
# Enter the batch size of your resource. Email support@forefront.ai with your model and resource type to get batch size information
BATCH_SIZE = 6

headers = {
    'Authorization': f'Bearer {API_KEY}',
    'Content-Type': 'application/json'
}


def make_repetitive_string(n: int) -> str:
    out = ""
    for _ in range(n - 1):
        out += "hello "
    out += "hello"
    return out


def make_request() -> float:

    body = {
        'prompt': make_repetitive_string(TOKENS_IN),
        'max_tokens': TOKENS_OUT,
        'repetition_penalty': 0.5,
        'temperature': 0.01,
        # The n parameter is identical to sending n requests in parallel so it's a simple method to benchmark a single GPU
        'n': BATCH_SIZE
    }

    start = time.time()

    res = requests.post(URL, json=body, headers=headers)

    if res.status_code != 200:
      print(res.status_code)
      raise RuntimeError('Bad status code.')

    return time.time() - start


if __name__ == '__main__':
    total = 0.0

    for _ in range(N_TRIALS):
      total += make_request()

    batch_time = total / N_TRIALS
    rpm = 60 / batch_time * BATCH_SIZE

    print(f'Requests per minute: {round(rpm)}')
    print(f'Response speed: {round(batch_time, 2)} seconds')