# `prompto` vs. synchronous Python for loop

In [1]:
import time
import os
import requests
from dotenv import load_dotenv

from prompto.settings import Settings
from prompto.experiment import Experiment

from api_utils import send_prompts_sync
from dataset_utils import load_prompt_dicts, load_prompts, generate_experiment_1_file

load_dotenv(dotenv_path=".env")

True

In [2]:
alpaca_prompts = load_prompts("./sample_prompts.json")

In [3]:
OPENAI_EXPERIMENT_FILE = "./data/input/openai.jsonl"
GEMINI_EXPERIMENT_FILE = "./data/input/gemini.jsonl"
OLLAMA_EXPERIMENT_FILE = "./data/input/ollama.jsonl"

In [4]:
generate_experiment_1_file(
    path=OPENAI_EXPERIMENT_FILE,
    prompts=alpaca_prompts,
    api="openai",
    model_name="gpt-3.5-turbo",
    params={"n": 1, "temperature": 0.9, "max_tokens": 100},
)

In [5]:
generate_experiment_1_file(
    path=GEMINI_EXPERIMENT_FILE,
    prompts=alpaca_prompts,
    api="gemini",
    model_name="gemini-1.5-flash",
    params={"candidate_count": 1, "temperature": 0.9, "max_output_tokens": 100},
)

In [6]:
generate_experiment_1_file(
    path=OLLAMA_EXPERIMENT_FILE,
    prompts=alpaca_prompts,
    api="ollama",
    model_name="llama3",
    params={"temperature": 0.9, "num_predict": 100, "seed": 42},
)

In [7]:
sync_times = {}
prompto_times = {}

In [8]:
print(
    f"len(load_prompt_dicts(OPENAI_EXPERIMENT_FILE)): {len(load_prompt_dicts(OPENAI_EXPERIMENT_FILE))}"
)
print(
    f"len(load_prompt_dicts(GEMINI_EXPERIMENT_FILE)): {len(load_prompt_dicts(GEMINI_EXPERIMENT_FILE))}"
)
print(
    f"len(load_prompt_dicts(OLLAMA_EXPERIMENT_FILE)): {len(load_prompt_dicts(OLLAMA_EXPERIMENT_FILE))}"
)

len(load_prompt_dicts(OPENAI_EXPERIMENT_FILE)): 100
len(load_prompt_dicts(GEMINI_EXPERIMENT_FILE)): 100
len(load_prompt_dicts(OLLAMA_EXPERIMENT_FILE)): 100


## OpenAI

In [9]:
start = time.time()
openai_sync = send_prompts_sync(prompt_dicts=load_prompt_dicts(OPENAI_EXPERIMENT_FILE))
sync_times["openai"] = time.time() - start

100%|██████████| 100/100 [02:06<00:00,  1.26s/it]


In [10]:
openai_experiment = Experiment(
    file_name="openai.jsonl", settings=Settings(data_folder="./data", max_queries=500)
)

start = time.time()
openai_responses, _ = await openai_experiment.process()
prompto_times["openai"] = time.time() - start

Sending 100 queries at 500 QPM with RI of 0.12s  (attempt 1/3): 100%|██████████| 100/100 [00:12<00:00,  8.20query/s]
Waiting for responses  (attempt 1/3): 100%|██████████| 100/100 [00:01<00:00, 58.66query/s]


In [11]:
sync_times["openai"], prompto_times["openai"]

(126.30979299545288, 13.91887378692627)

## Gemini

In [12]:
start = time.time()
gemini_sync = send_prompts_sync(prompt_dicts=load_prompt_dicts(GEMINI_EXPERIMENT_FILE))
sync_times["gemini"] = time.time() - start

100%|██████████| 100/100 [02:43<00:00,  1.63s/it]


In [13]:
gemini_experiment = Experiment(
    file_name="gemini.jsonl", settings=Settings(data_folder="./data", max_queries=500)
)

start = time.time()
gemini_responses, _ = await gemini_experiment.process()
prompto_times["gemini"] = time.time() - start

Sending 100 queries at 500 QPM with RI of 0.12s  (attempt 1/3):   0%|          | 0/100 [00:00<?, ?query/s]

Sending 100 queries at 500 QPM with RI of 0.12s  (attempt 1/3): 100%|██████████| 100/100 [00:12<00:00,  8.17query/s]
Waiting for responses  (attempt 1/3): 100%|██████████| 100/100 [00:01<00:00, 55.77query/s]


In [14]:
sync_times["gemini"], prompto_times["gemini"]

(163.48729801177979, 14.094270944595337)

## Ollama

In [15]:
requests.post(
    f"{os.environ.get('OLLAMA_API_ENDPOINT')}/api/generate", json={"model": "llama3"}
)

<Response [200]>

In [16]:
start = time.time()
ollama_sync = send_prompts_sync(prompt_dicts=load_prompt_dicts(OLLAMA_EXPERIMENT_FILE))
sync_times["ollama"] = time.time() - start

100%|██████████| 100/100 [04:31<00:00,  2.71s/it]


In [17]:
ollama_experiment = Experiment(
    file_name="ollama.jsonl", settings=Settings(data_folder="./data", max_queries=40)
)

start = time.time()
ollama_responses, _ = await ollama_experiment.process()
prompto_times["ollama"] = time.time() - start

Sending 100 queries at 40 QPM with RI of 1.5s  (attempt 1/3): 100%|██████████| 100/100 [02:30<00:00,  1.50s/query]
Waiting for responses  (attempt 1/3): 100%|██████████| 100/100 [01:58<00:00,  1.18s/query]


In [18]:
sync_times["ollama"], prompto_times["ollama"]

(271.4494206905365, 268.59372997283936)

## Analysis

In [19]:
sync_times

{'openai': 126.30979299545288,
 'gemini': 163.48729801177979,
 'ollama': 271.4494206905365}

In [20]:
prompto_times

{'openai': 13.91887378692627,
 'gemini': 14.094270944595337,
 'ollama': 268.59372997283936}