In [2]:

subscriber_id = '<your_subscriber_email@example.com>'
api_key = '<your_api_key>'

# Note: This notebook was run on slow development hardware, for Client demonstration purposes.
#       The latencies are not representative of production hardware.

import AIDepot

from pprint import pprint
import time

client = AIDepot.Client(subscriber_id=subscriber_id, api_key=api_key)


conversations = [
    {
        "messages": [{
            "role": "user", "content": "Can you tell me a short story?"
        }],
        "temperature": 0.3,
        "top_p": 1,
        "max_tokens": 1000,
        "presence_penalty": 0,
        "frequency_penalty": 0,
        "n": 1
    },
    {
        "messages": [{
            "role": "user", "content": "What is the square root of 2?"
        }],
        "temperature": 0.3,
        "top_p": 1,
        "max_tokens": 1000,
        "presence_penalty": 0,
        "frequency_penalty": 0,
        "n": 2
    },
]

job = {
    'sla': 'priority',
    'conversations': conversations
}

In [3]:
# Submit a job and wait for the response:

response = client.submit_job(AIDepot.Resources.MISTRAL_7B_MESSAGE, job)
pprint(response)


(200,
 {'batch_submitted_timestamp': datetime.datetime(2024, 11, 30, 18, 34, 18, 271921, tzinfo=zoneinfo.ZoneInfo(key='America/Chicago')),
  'depot_permits_available': 1841009228,
  'first_batch_item_completion_timestamp': datetime.datetime(2024, 11, 30, 18, 34, 20, 902030, tzinfo=zoneinfo.ZoneInfo(key='America/Chicago')),
  'job_id': 1086,
  'last_batch_item_completion_timestamp': datetime.datetime(2024, 11, 30, 18, 34, 33, 962050, tzinfo=zoneinfo.ZoneInfo(key='America/Chicago')),
  'responses': [{'batch_index': 0,
                 'choices': [{'finish_reason': 'stop',
                              'index': 0,
                              'logprobs': None,
                              'message': {'audio': None,
                                          'content': 'Once upon a time, in a '
                                                     'small village nestled '
                                                     'between two great '
                                             

In [4]:
# From an async context, get a future response
# Note that this is a Jupyter notebook, so I can await async functions directly
# Normally this would be called within an async function like:
#
# async def send_job():
#     future_response = client.submit_job_async(AIDepot.Resources.MISTRAL_7B_MESSAGE, job)
#     response = await future_response

job = {
    'sla': 'batch',
    'conversations': [conversations[1]]
}

future_response = client.submit_job_async(AIDepot.Resources.MISTRAL_7B_MESSAGE, job)
response = await future_response
pprint(response)

(200,
 {'batch_submitted_timestamp': datetime.datetime(2024, 11, 30, 18, 34, 34, 198094, tzinfo=zoneinfo.ZoneInfo(key='America/Chicago')),
  'depot_permits_available': 1841008660,
  'first_batch_item_completion_timestamp': datetime.datetime(2024, 11, 30, 18, 34, 37, 18879, tzinfo=zoneinfo.ZoneInfo(key='America/Chicago')),
  'job_id': 1087,
  'last_batch_item_completion_timestamp': datetime.datetime(2024, 11, 30, 18, 34, 37, 18879, tzinfo=zoneinfo.ZoneInfo(key='America/Chicago')),
  'responses': [{'batch_index': 0,
                 'choices': [{'finish_reason': 'stop',
                              'index': 0,
                              'logprobs': None,
                              'message': {'audio': None,
                                          'content': 'The square root of 2 '
                                                     '(√2) is an irrational '
                                                     "number. It's "
                                                     '

In [5]:
# Start a job without waiting for it to finish:
# Check the result immediately
# Wait a bit and check the result again

http_response_code, response_message = client.start_job(AIDepot.Resources.MISTRAL_7B_MESSAGE, job)
print(http_response_code)
pprint(response_message)
job_id = response_message['job_id']
http_response_code, response_message = client.get_job_result(AIDepot.Resources.LLM_JOB_RESULTS, job_id)
print(http_response_code)
pprint(response_message)


# Wait a while and make another call to get_job_results
time.sleep(10)
http_response_code, response_message = client.get_job_result(AIDepot.Resources.LLM_JOB_RESULTS, job_id)
print(http_response_code)
pprint(response_message)


201
{'job_id': 1088, 'permits_remaining': 1841000650}
200
{'batch_submitted_timestamp': datetime.datetime(2024, 11, 30, 18, 34, 37, 153981, tzinfo=zoneinfo.ZoneInfo(key='America/Chicago')),
 'depot_permits_available': 1841000650,
 'job_id': 1088,
 'status': 'pending',
 'total_depot_permits_for_this_job': None,
 'total_input_tokens': None,
 'total_output_tokens': None}
200
{'batch_submitted_timestamp': datetime.datetime(2024, 11, 30, 18, 34, 37, 153981, tzinfo=zoneinfo.ZoneInfo(key='America/Chicago')),
 'depot_permits_available': 1841007972,
 'first_batch_item_completion_timestamp': datetime.datetime(2024, 11, 30, 18, 34, 40, 74552, tzinfo=zoneinfo.ZoneInfo(key='America/Chicago')),
 'job_id': 1088,
 'last_batch_item_completion_timestamp': datetime.datetime(2024, 11, 30, 18, 34, 40, 74552, tzinfo=zoneinfo.ZoneInfo(key='America/Chicago')),
 'responses': [{'batch_index': 0,
                'choices': [{'finish_reason': 'stop',
                             'index': 0,
                      

In [6]:
# Start a job without waiting for it to finish:
# Do some other processing
# Connect to AIDepot to wait for the job to finish

http_response_code, response_message = client.start_job(AIDepot.Resources.MISTRAL_7B_MESSAGE, job)
print(http_response_code)
pprint(response_message)
job_id = response_message['job_id']

j = 0
for i in range(1000):
    j += i

http_response_code, response_message = client.connect_and_listen_for_status(job_id)
print(http_response_code)
pprint(response_message)


201
{'job_id': 1089, 'permits_remaining': 1840999962}
200
{'batch_submitted_timestamp': datetime.datetime(2024, 11, 30, 18, 34, 47, 366946, tzinfo=zoneinfo.ZoneInfo(key='America/Chicago')),
 'depot_permits_available': 1841007436,
 'first_batch_item_completion_timestamp': datetime.datetime(2024, 11, 30, 18, 34, 49, 137955, tzinfo=zoneinfo.ZoneInfo(key='America/Chicago')),
 'job_id': 1089,
 'last_batch_item_completion_timestamp': datetime.datetime(2024, 11, 30, 18, 34, 49, 137955, tzinfo=zoneinfo.ZoneInfo(key='America/Chicago')),
 'responses': [{'batch_index': 0,
                'choices': [{'finish_reason': 'stop',
                             'index': 0,
                             'logprobs': None,
                             'message': {'audio': None,
                                         'content': 'The square root of 2 (√2) '
                                                    'is an irrational number. '
                                                    'It is approximately 

In [7]:
# In case you want to connect by some other means than by using the client,
# the client can generate the http routes, and the websocket routes that are
# used for listening for jobs to finish:

http_route = client.build_http_route(AIDepot.Resources.LLAMA3_1_NEMOTRON_70B_MESSAGE)
print(http_route)

# Reusing the job_id from the previous cell:
websocket_route = client.build_websocket_route(job_id)
print(websocket_route)



https://aidepot.net/api/v1/llm/llama_3_1_nemotron_70b/message/
wss://aidepot.net/api/ws/status/andrewrobbins%40hotmail.com/1089/
