In [1]:
import time
import asyncio
import aiohttp
from datetime import datetime
import json

In [2]:
API_URL = "http://10.215.130.20:11434/api/generate"
PAYLOAD = {
    "model": "mistral",
    "prompt": "Write a short poem about AI and Telecoms",
    "temperature": 0.7,
    "max_tokens": 200,
    "stream": False 
}

In [37]:
class TimingTrace(aiohttp.TraceConfig):
    def __init__(self):
        super().__init__()
        self.timings = {}
        self.on_request_start.append(self.on_request_start_callback)
        self.on_request_chunk_sent.append(self.on_request_chunk_sent_callback)
        self.on_response_chunk_received.append(self.on_response_start_callback)
    
    async def on_request_start_callback(self, session, context, params):
        context.request_start = datetime.now()
        print(f"Request started (client): {context.request_start}")
    
    async def on_request_chunk_sent_callback(self, session, context, params):
        context.request_sent = datetime.now()
        print(f"Request sent to server: {context.request_sent}")
    
    async def on_response_start_callback(self, session, context, params):
        context.server_received = datetime.now()
        print(f"Server acknowledged (first byte): {context.server_received}")

async def send_request_with_timing(request_id, prompt):
    trace_config = TimingTrace()

    url = API_URL
    payload = {
        "model": "mistral",
        "prompt": prompt,
        "stream": False
    }

    async with aiohttp.ClientSession(trace_configs=[trace_config]) as session:
        
        client_send_time = datetime.now()

        print("request start", datetime.now())
        
        async with session.post(url, json=payload) as response:
            # Server has received and started processing
            server_ack_time = datetime.now()
            print("request sent", server_ack_time)
            
            data = await response.json()
            print(data)
            response_complete_time = datetime.now()
            print("response_complete", response_complete_time)
            
            return {
                "id": request_id,
                "client_send": client_send_time,
                "server_ack": server_ack_time,  # This is when server received it
                "response_complete": response_complete_time,
                "time_to_server": (server_ack_time - client_send_time).total_seconds()
            }

async def traffic_generator(num_requests, prompt):
    tasks = [send_request_with_timing(i, prompt) for i in range(num_requests)]
    results = await asyncio.gather(*tasks)
    return results

# Run
results = await traffic_generator(1, "Hello")
for r in results:
    print(f"Request {r['id']}: Server received in {r['time_to_server']*1000:.2f}ms")

request start 2025-12-30 18:26:05.745591
Request started (client): 2025-12-30 18:26:05.747187
Request sent to server: 2025-12-30 18:26:05.749950
request sent 2025-12-30 18:26:08.798927
Server acknowledged (first byte): 2025-12-30 18:26:08.799342
{'model': 'mistral', 'created_at': '2025-12-30T18:26:08.790429511Z', 'response': ' Hello! How can I assist you today? If you have any questions or need help with something, feel free to ask.\n\nHere are a few examples of what I can do:\n\n1. Answer factual questions: "What is the capital of France?"\n2. Help you learn a new language: "How do you say \'hello\' in Spanish?"\n3. Provide explanations on various topics: "What is quantum physics?"\n4. Solve math problems: "What is the square root of 169?"\n5. Give advice and recommendations: "Which books would you recommend for someone interested in learning about artificial intelligence?"\n6. Tell jokes or fun facts: "Why don\'t scientists trust atoms? Because they make up everything!"\n7. Help with

In [4]:
import aiohttp
import asyncio
import time
from datetime import datetime

timings = {}

async def on_request_start(session, ctx, params):
    timings['start_wall'] = datetime.now()
    print("Also Request started (UTC):", timings.get('start_wall'))
    timings['start_perf'] = time.perf_counter()

async def on_request_end(session, ctx, params):
    timings['end_wall'] = datetime.now()
    print("Request ended/ack received (UTC):", timings.get('end_wall'))
    timings['end_perf'] = time.perf_counter()
    timings['elapsed_sec'] = timings['end_perf'] - timings['start_perf']

trace_config = aiohttp.TraceConfig()
trace_config.on_request_start.append(on_request_start)
trace_config.on_request_end.append(on_request_end)

async def main():
    async with aiohttp.ClientSession(trace_configs=[trace_config]) as session:
        start_at = datetime.now()
        print("Request started:", start_at)
        async with session.post(API_URL, json={"model": "llama2", "prompt": "Hello", "stream": False}) as resp:
            # This is when headers have been received
            received_at = datetime.now()
            print("Also received_at (UTC):", received_at)
            server_date_hdr = resp.headers.get("Date")

            
            print("Elapsed seconds:", timings.get('elapsed_sec'))
            print("Server Date:", server_date_hdr)

await main()

Request started: 2025-12-31 17:16:02.777273
Also Request started (UTC): 2025-12-31 17:16:02.777868
Request ended/ack received (UTC): 2025-12-31 17:16:02.817682
Also received_at (UTC): 2025-12-31 17:16:02.817757
Elapsed seconds: 0.03983436799899209
Server Date: Wed, 31 Dec 2025 17:16:02 GMT


In [11]:
async def on_request_start(session, ctx, params):
    print("on_request_start", time.perf_counter())

async def on_request_headers_sent(session, ctx, params):
    print("on_request_headers_sent", time.perf_counter())

async def on_request_chunk_sent(session, ctx, params):
    print("on_request_chunk_sent", time.perf_counter())

async def on_request_end(session, ctx, params):
    print("on_request_end", time.perf_counter())

async def on_response_chunk_received(session, ctx, params):
    print("on_response_chunk_received", time.perf_counter())



trace_config = aiohttp.TraceConfig()
trace_config.on_request_start.append(on_request_start)
trace_config.on_request_headers_sent.append(on_request_headers_sent)
trace_config.on_request_chunk_sent.append(on_request_chunk_sent)
trace_config.on_request_end.append(on_request_end)
trace_config.on_response_chunk_received.append(on_response_chunk_received)


async def call(id):
    async with aiohttp.ClientSession(trace_configs=[trace_config]) as session:
        print(id, time.perf_counter())
        async with session.post(API_URL, json={"model": "mistral", "prompt": "Write me an article about AI development", "stream": False}) as resp:
            print(id, "status", time.perf_counter(), resp.status)
            print(id, "json", time.perf_counter(), await resp.json())

await asyncio.gather(*(call(i) for i in range(1)))

0 99902.578786815
on_request_start 99902.578922236
on_request_headers_sent 99902.580588508
on_request_chunk_sent 99902.580668303
on_request_end 99909.936220615
0 status 99909.936331732 200
on_response_chunk_received 99909.936359869
0 json 99909.936342206 {'model': 'mistral', 'created_at': '2025-12-31T18:07:43.868814899Z', 'response': ' Title: The Evolution and Future of Artificial Intelligence Development\n\nArtificial Intelligence (AI) is a revolutionary technology that has transformed numerous industries and aspects of our lives, from self-driving cars to personalized entertainment. This article provides an overview of the development of AI, its current state, and its future prospects.\n\n**Origins and Early Development**\n\nThe concept of AI can be traced back to the 1950s when computer scientists at Dartmouth College coined the term "Artificial Intelligence." The first AI program, Logic Theorist, was created in 1956 by Allen Newell and Herbert Simon. It could prove new mathematical

[None]