# Querying different LLM endpoints in parallel with `prompto` vs. synchronous Python for loop

In [7]:
import time
import os
import requests
from dotenv import load_dotenv

from prompto.settings import Settings
from prompto.experiment import Experiment

from api_utils import send_prompts_sync
from dataset_utils import load_prompt_dicts, load_prompts, generate_experiment_2_file

load_dotenv(dotenv_path=".env")

True

## Setup input jsonl files

In [2]:
alpaca_prompts = load_prompts("./sample_prompts.json")

In [3]:
COMBINED_EXPERIMENT_FILENAME = "./data/input/all_experiments.jsonl"
INPUT_EXPERIMENT_FILEDIR = "./data/input"

if not os.path.isdir(INPUT_EXPERIMENT_FILEDIR):
    os.mkdir(INPUT_EXPERIMENT_FILEDIR)

Generate the three files seperate experiment json files. Then load them in and merge into a single file. 

In [4]:
generate_experiment_2_file(
    path=COMBINED_EXPERIMENT_FILENAME,
    prompts=alpaca_prompts,
    api=["openai", "gemini", "ollama"],
    model_name=["gpt-3.5-turbo", "gemini-1.5-flash", "llama3"],
    params=[
        {"n": 1, "temperature": 0.9, "max_tokens": 100},
        {"candidate_count": 1, "temperature": 0.9, "max_output_tokens": 100},
        {"temperature": 0.9, "num_predict": 100, "seed": 42},
    ],
)

In [5]:
print(
    f"len(load_prompt_dicts(COMBINED_EXPERIMENT_FILENAME)): {len(load_prompt_dicts(COMBINED_EXPERIMENT_FILENAME))}"
)

len(load_prompt_dicts(COMBINED_EXPERIMENT_FILENAME)): 300


In [8]:
requests.post(
    f"{os.environ.get('OLLAMA_API_ENDPOINT')}/api/generate", json={"model": "llama3"}
)

<Response [200]>

In [9]:
start = time.time()
multiple_api_sync = send_prompts_sync(
    prompt_dicts=load_prompt_dicts(COMBINED_EXPERIMENT_FILENAME)
)
sync_time = time.time() - start

100%|██████████| 300/300 [09:18<00:00,  1.86s/it]


In [10]:
multiple_api_experiment = Experiment(
    file_name="all_experiments.jsonl",
    settings=Settings(
        data_folder="./data",
        parallel=True,
        max_queries_dict={"openai": 500, "gemini": 500, "ollama": 40},
    ),
)

start = time.time()
multiple_api_responses, _ = await multiple_api_experiment.process()
prompto_time = time.time() - start

Waiting for all groups to complete:   0%|          | 0/3 [00:00<?, ?group/s]
[A

[A[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A

[A[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A

[A[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A

[A[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A

[A[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A

[A[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A

[A[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A

[A[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A

[A[A
[A
Sending 100 queries at 500 QPM with RI of 0.12s for group openai  (attempt 1/3): 100%|██████████| 100/100 [00:12<00:00,  8.05query/s]

Sending 100 queries at 500 QPM with RI of 0.12s for group gemini  (attempt 1/3): 100%|██████████| 100/100 [00:12<00:00,  8.03query/s]

[A
[A

Waiting for responses for group gemini  (attempt 1/3): 100%|██████████| 100/100 [00:01<00:00, 61.42query/s]
Waiting for responses for group openai  (attempt 1/3): 100%|████████

In [11]:
sync_time, prompto_time

(558.7412779331207, 269.0622651576996)