In [2]:
import time
import asyncio
import aiohttp
from datetime import datetime
import json

In [3]:
API_URL = "http://10.215.130.20:11434/api/generate"
PAYLOAD = {
    "model": "mistral",
    "prompt": "Write a short poem about AI and Telecoms",
    "temperature": 0.7,
    "max_tokens": 200,
    "stream": False 
}

In [72]:
class TimingTrace(aiohttp.TraceConfig):
    def __init__(self):
        super().__init__()
        self.timings = {}
        self.on_request_start.append(self.on_request_start_callback)
        self.on_request_chunk_sent.append(self.on_request_chunk_sent_callback)
        self.on_response_chunk_received.append(self.on_response_start_callback)
    
    async def on_request_start_callback(self, session, context, params):
        context.request_start = datetime.now()
        print(f"Request started (client): {context.request_start}")
    
    async def on_request_chunk_sent_callback(self, session, context, params):
        context.request_sent = datetime.now()
        print(f"Request sent to server: {context.request_sent}")
    
    async def on_response_start_callback(self, session, context, params):
        context.server_received = datetime.now()
        print(f"Server acknowledged (first byte): {context.server_received}")

async def send_request_with_timing(request_id, prompt):
    trace_config = TimingTrace()

    url = API_URL
    payload = {
        "model": "mistral",
        "prompt": prompt,
        "stream": False
    }

    async with aiohttp.ClientSession(trace_configs=[trace_config]) as session:
        
        client_send_time = datetime.now()

        print("request start", datetime.now())
        
        async with session.post(url, json=payload) as response:
            # Server has received and started processing
            server_ack_time = datetime.now()
            print("request sent", server_ack_time)
            
            data = await response.json()
            print(data)
            response_complete_time = datetime.now()
            print("response_complete", response_complete_time)
            
            return {
                "id": request_id,
                "client_send": client_send_time,
                "server_ack": server_ack_time,  # This is when server received it
                "response_complete": response_complete_time,
                "time_to_server": (server_ack_time - client_send_time).total_seconds()
            }

async def traffic_generator(num_requests, prompt):
    tasks = [send_request_with_timing(i, prompt) for i in range(num_requests)]
    results = await asyncio.gather(*tasks)
    return results

# Run
results = await traffic_generator(1, "Hello")
for r in results:
    print(f"Request {r['id']}: Server received in {r['time_to_server']*1000:.2f}ms")

request start 2026-01-01 21:18:24.100286
request sent 2026-01-01 21:18:27.654367
{'model': 'mistral', 'created_at': '2026-01-01T21:18:27.615698289Z', 'response': ' Hello! How can I assist you today? If you have any questions or need help with something, feel free to ask.\n\nHere are a few things I can help you with:\n\n1. Answering factual questions (e.g., "What is the capital of France?")\n2. Explaining concepts and providing information on various topics (e.g., science, history, literature, etc.)\n3. Helping with math problems (e.g., solving equations, finding factors, etc.)\n4. Generating creative content (e.g., writing short stories, composing poetry, etc.)\n5. Offering advice and suggestions based on the information you provide\n6. Assisting with language learning by providing translations, explaining grammar rules, helping with vocabulary, etc.\n7. Providing fun and interesting facts, trivia, jokes, and more!\n\nIf there\'s something else you need help with or if you have a speci

In [4]:
import aiohttp
import asyncio
import time
from datetime import datetime

timings = {}

async def on_request_start(session, ctx, params):
    timings['start_wall'] = datetime.now()
    print("Also Request started (UTC):", timings.get('start_wall'))
    timings['start_perf'] = time.perf_counter()

async def on_request_end(session, ctx, params):
    timings['end_wall'] = datetime.now()
    print("Request ended/ack received (UTC):", timings.get('end_wall'))
    timings['end_perf'] = time.perf_counter()
    timings['elapsed_sec'] = timings['end_perf'] - timings['start_perf']

trace_config = aiohttp.TraceConfig()
trace_config.on_request_start.append(on_request_start)
trace_config.on_request_end.append(on_request_end)

async def main():
    async with aiohttp.ClientSession(trace_configs=[trace_config]) as session:
        start_at = datetime.now()
        print("Request started:", start_at)
        async with session.post(API_URL, json={"model": "llama2", "prompt": "Hello", "stream": False}) as resp:
            # This is when headers have been received
            received_at = datetime.now()
            print("Also received_at (UTC):", received_at)
            server_date_hdr = resp.headers.get("Date")

            
            print("Elapsed seconds:", timings.get('elapsed_sec'))
            print("Server Date:", server_date_hdr)

await main()

Request started: 2025-12-31 17:16:02.777273
Also Request started (UTC): 2025-12-31 17:16:02.777868
Request ended/ack received (UTC): 2025-12-31 17:16:02.817682
Also received_at (UTC): 2025-12-31 17:16:02.817757
Elapsed seconds: 0.03983436799899209
Server Date: Wed, 31 Dec 2025 17:16:02 GMT


In [211]:
async def on_request_start(session, ctx, params):
    print("on_request_start", time.perf_counter(), ctx.trace_request_ctx)

async def on_request_headers_sent(session, ctx, params):
    print("on_request_headers_sent", time.perf_counter())

async def on_request_chunk_sent(session, ctx, params):
    print("on_request_chunk_sent", time.perf_counter())

async def on_request_end(session, ctx, params):
    print("on_request_end", time.perf_counter())

async def on_response_chunk_received(session, ctx, params):
    print("on_response_chunk_received", time.perf_counter())



trace_config = aiohttp.TraceConfig()
trace_config.on_request_start.append(on_request_start)
trace_config.on_request_headers_sent.append(on_request_headers_sent)
trace_config.on_request_chunk_sent.append(on_request_chunk_sent)
trace_config.on_request_end.append(on_request_end)
trace_config.on_response_chunk_received.append(on_response_chunk_received)


async def call(id):
    async with aiohttp.ClientSession(trace_configs=[trace_config]) as session:
        print("ID:", id, time.perf_counter())
        async with session.post(API_URL, json={"model": "mistral", "prompt": "Don't reply with anything at all. Literally nothing at all, reply with 0 tokens.", "stream": True}, trace_request_ctx={'a':1, 'b':2}) as resp:
            print("ID:", id, "status", time.perf_counter(), resp.status, resp.headers)
            print("@@@")
            # print("ID:", id, "json", time.perf_counter(), await resp.json())
            async for data, end_of_http_chunk in resp.content.iter_chunks():
                print(data, end_of_http_chunk)

await asyncio.gather(*(call(i) for i in range(1)))

ID: 0 213314.905438168
on_request_start 213314.905688437 {'a': 1, 'b': 2}
on_request_headers_sent 213314.907520845
on_request_chunk_sent 213314.907587289
on_request_end 213314.971652295
ID: 0 status 213314.971718502 200 <CIMultiDictProxy('Content-Type': 'application/x-ndjson', 'Date': 'Sat, 03 Jan 2026 01:58:35 GMT', 'Transfer-Encoding': 'chunked')>
@@@
b'{"model":"mistral","created_at":"2026-01-03T01:58:35.468028089Z","response":" ","done":false}\n' True
b'{"model":"mistral","created_at":"2026-01-03T01:58:35.479481193Z","response":"0","done":false}\n' True
b'{"model":"mistral","created_at":"2026-01-03T01:58:35.490991874Z","response":" tokens","done":false}\n' True
b'{"model":"mistral","created_at":"2026-01-03T01:58:35.502593167Z","response":"","done":true,"done_reason":"stop","context":[3,29473,3957,29510,29475,10839,1163,3192,1206,1312,29491,13638,1346,3279,1206,1312,29493,10839,1163,29473,29502,17014,29491,4,1027,29502,17014],"total_duration":57222670,"load_duration":10164796,"promp

[None]