# `prompto` vs. synchronous Python for loop

In [1]:
import time
import os
import requests
from dotenv import load_dotenv

from prompto.settings import Settings
from prompto.experiment import Experiment

from api_utils import send_prompts_sync
from dataset_utils import load_prompt_dicts, load_prompts, generate_experiment_1_file

load_dotenv(dotenv_path=".env")

True

In [2]:
alpaca_prompts = load_prompts("./sample_prompts.json")

In [3]:
OPENAI_EXPERIMENT_FILE = "./data/input/openai.jsonl"
GEMINI_EXPERIMENT_FILE = "./data/input/gemini.jsonl"
OLLAMA_EXPERIMENT_FILE = "./data/input/ollama.jsonl"

In [4]:
generate_experiment_1_file(
    path=OPENAI_EXPERIMENT_FILE,
    prompts=alpaca_prompts,
    api="openai",
    model_name="gpt-3.5-turbo",
    params={"n": 1, "temperature": 0.9, "max_tokens": 100},
)

In [23]:
generate_experiment_1_file(
    path=GEMINI_EXPERIMENT_FILE,
    prompts=alpaca_prompts,
    api="gemini",
    model_name="gemini-1.5-flash",
    params={"candidate_count": 1, "temperature": 0.9, "max_output_tokens": 100},
)

In [30]:
generate_experiment_1_file(
    path=OLLAMA_EXPERIMENT_FILE,
    prompts=alpaca_prompts,
    api="ollama",
    model_name="llama3",
    params={"temperature": 0.9, "num_predict": 100, "seed": 42},
)

In [7]:
sync_times = {}
prompto_times = {}

In [8]:
print(
    f"len(load_prompt_dicts(OPENAI_EXPERIMENT_FILE)): {len(load_prompt_dicts(OPENAI_EXPERIMENT_FILE))}"
)
print(
    f"len(load_prompt_dicts(GEMINI_EXPERIMENT_FILE)): {len(load_prompt_dicts(GEMINI_EXPERIMENT_FILE))}"
)
print(
    f"len(load_prompt_dicts(OLLAMA_EXPERIMENT_FILE)): {len(load_prompt_dicts(OLLAMA_EXPERIMENT_FILE))}"
)

len(load_prompt_dicts(OPENAI_EXPERIMENT_FILE)): 50
len(load_prompt_dicts(GEMINI_EXPERIMENT_FILE)): 50
len(load_prompt_dicts(OLLAMA_EXPERIMENT_FILE)): 50


## OpenAI

In [9]:
start = time.time()
openai_sync = send_prompts_sync(prompt_dicts=load_prompt_dicts(OPENAI_EXPERIMENT_FILE))
sync_times["openai"] = time.time() - start

100%|██████████| 50/50 [01:03<00:00,  1.27s/it]


In [10]:
openai_experiment = Experiment(
    file_name="openai.jsonl", settings=Settings(data_folder="./data", max_queries=300)
)

start = time.time()
openai_responses, _ = await openai_experiment.process()
prompto_times["openai"] = time.time() - start

Sending 50 queries at 300 QPM with RI of 0.2s  (attempt 1/3): 100%|██████████| 50/50 [00:10<00:00,  4.95query/s]
Waiting for responses  (attempt 1/3): 100%|██████████| 50/50 [00:11<00:00,  4.38query/s] 


In [11]:
sync_times["openai"], prompto_times["openai"]

(63.54397201538086, 21.541850090026855)

## Gemini

In [24]:
start = time.time()
gemini_sync = send_prompts_sync(prompt_dicts=load_prompt_dicts(GEMINI_EXPERIMENT_FILE))
sync_times["gemini"] = time.time() - start

100%|██████████| 50/50 [01:15<00:00,  1.50s/it]


In [25]:
gemini_experiment = Experiment(
    file_name="gemini.jsonl", settings=Settings(data_folder="./data", max_queries=300)
)

start = time.time()
gemini_responses, _ = await gemini_experiment.process()
prompto_times["gemini"] = time.time() - start

Sending 50 queries at 300 QPM with RI of 0.2s  (attempt 1/3): 100%|██████████| 50/50 [00:10<00:00,  4.96query/s]
Waiting for responses  (attempt 1/3): 100%|██████████| 50/50 [00:01<00:00, 41.28query/s] 


In [26]:
sync_times["gemini"], prompto_times["gemini"]

(75.06720781326294, 11.306013822555542)

## Ollama

In [15]:
requests.post(
    f"{os.environ.get('OLLAMA_API_ENDPOINT')}/api/generate", json={"model": "llama3"}
)

<Response [200]>

In [16]:
start = time.time()
ollama_sync = send_prompts_sync(prompt_dicts=load_prompt_dicts(OLLAMA_EXPERIMENT_FILE))
sync_times["ollama"] = time.time() - start

100%|██████████| 50/50 [02:14<00:00,  2.68s/it]


In [31]:
ollama_experiment = Experiment(
    file_name="ollama.jsonl", settings=Settings(data_folder="./data", max_queries=40)
)

start = time.time()
ollama_responses, _ = await ollama_experiment.process()
prompto_times["ollama"] = time.time() - start

Sending 50 queries at 40 QPM with RI of 1.5s  (attempt 1/3): 100%|██████████| 50/50 [01:15<00:00,  1.50s/query]
Waiting for responses  (attempt 1/3): 100%|██████████| 50/50 [00:56<00:00,  1.13s/query]


In [18]:
sync_times["ollama"], prompto_times["ollama"]

(134.14817905426025, 131.92272996902466)

## Analysis

In [27]:
sync_times

{'openai': 63.54397201538086,
 'gemini': 75.06720781326294,
 'ollama': 134.14817905426025}

In [32]:
prompto_times

{'openai': 21.541850090026855,
 'gemini': 11.306013822555542,
 'ollama': 131.52019500732422}