In [2]:

import asyncio
import aiohttp
import requests
from time import perf_counter

API_URL = "http://10.215.130.20:11434/api/generate"

PAYLOAD = {
    "model": "mistral",
    "prompt": "Write a short poem about AI and Telecoms",
    "temperature": 0.7,
    "max_tokens": 200,
    "stream": False 
}


ModuleNotFoundError: No module named 'aiohttp'

In [2]:
async def llm_request(url, payload):
    # trust_env=False prevents using HTTP(S)_PROXY in env (like --noproxy '*')
    async with aiohttp.ClientSession(trust_env=False) as session:
        try:
            async with session.post(url, json=payload, headers={"Content-Type": "application/json"}) as resp:
                resp.raise_for_status()
                return await resp.json(content_type=None)
        except Exception as e:
            print(f"Error: {e}")

async def multi_requests(n, url, payload):
    return await asyncio.gather(*(llm_request(url, payload) for _ in range(n)))

t = perf_counter()
a = await multi_requests(3, API_URL, PAYLOAD)
print(perf_counter() - t)


7.703046419000032


In [3]:
url = "http://www.google.com"
n = 20
t = perf_counter()
async def get(url, session):
        async with session.get(url=url) as res:
            await res.read()
async with aiohttp.ClientSession() as session:
    await asyncio.gather(*(get(url, session) for _ in range(n)))
print(perf_counter() - t)

0.2369936669999788


In [4]:
t = perf_counter()
for _ in range(n):
    requests.get(url)
print(perf_counter() - t)

2.1835936399999696


In [63]:
import os
import asyncio
import time

from langchain_ollama import ChatOllama

os.environ["NO_PROXY"] = "10.215.130.20" # OR 172.25.149.93

n = 5
messages = ["Write python code to calculate the factorial of n"] * n

# Initialize ChatOllama with remote server
llm = ChatOllama(
    model="mistral",  # Change to your model depending on what's available on the ollama server
    base_url="http://10.215.130.20:11434"  #OR 172.25.149.93:11435/   <----change the IP and ports accordingly
)



In [64]:
t = perf_counter()
async def invoke(msg):
    await llm.ainvoke(msg)
    await asyncio.sleep(1)
await asyncio.gather(*(invoke(msg) for msg in messages))
print(perf_counter() - t)

9.248692317003588


In [30]:
import pandas as pd

class SteadyUser:
    def __init__(self, name: str, req_freq: float, duration: float, delay_start: float = 0.0):
        self.name = name
        self.req_freq = req_freq
        self.duration = duration
        self.delay_start = delay_start
    
    def get_timestamps(self) -> list[float]:
        timestamps = []
        interval = 1.0 / self.req_freq
        t = 0.0
        while t <= self.duration:
            timestamps.append(t + self.delay_start)
            t += interval
        return timestamps


class BurstUser:
    def __init__(self, name: str, n_req: int, time: float):
        self.name = name
        self.n_req = n_req
        self.time = time
    
    def get_timestamps(self) -> list[float]:
        return [self.time for _ in range(self.n_req)]

user1 = SteadyUser(name='u1', req_freq=1.0, duration=10.0, delay_start=0.0)
user2 = SteadyUser(name='u2', req_freq=1.0, duration=10.0, delay_start=0.3)
user3 = SteadyUser(name='u3', req_freq=1.0, duration=10.0, delay_start=0.6)
user4 = BurstUser(name='u5', n_req=5, time=5.5)
user5 = BurstUser(name='u5', n_req=5, time=2.5)

users = [user1, user2, user3, user4, user5]

dfs = []
for user in users:
    timestamps = user.get_timestamps()
    dfs.append(pd.DataFrame(
        {
            'Timestamp': timestamps,
            'Request tokens': [50] * len(timestamps),
            'Response tokens': [50] * len(timestamps),
            'User': [user.name] * len(timestamps)
        }
    ))
schedule = pd.concat(dfs).reset_index(drop=True)
schedule

Unnamed: 0,Timestamp,Request tokens,Response tokens,User
0,0.0,50,50,u1
1,1.0,50,50,u1
2,2.0,50,50,u1
3,3.0,50,50,u1
4,4.0,50,50,u1
5,5.0,50,50,u1
6,6.0,50,50,u1
7,7.0,50,50,u1
8,8.0,50,50,u1
9,9.0,50,50,u1
