# Token Estimator

A notebook to aid with the cost estimations for this benchmarks. We estimate the number of tokens based on GPT-4o's tokenizer and then compute the API costs for various models based on current rates. 

In [None]:
# we run the notebook from the top-level of the repo
%cd ..

In [None]:
import tiktoken

from sortbench.util.data_utils import load_data_local

enc = tiktoken.encoding_for_model("gpt-4o")


# estimate tokens for basic mode
configs = load_data_local(file_path="benchmark_data", name="sortbench", mode="basic", version="v1.0")
total_tokens_basic = 0
for config in configs.values():
    for list in config.values():
        total_tokens_basic += len(enc.encode(f"{list}"))

print(f"Total tokens for basic mode: {total_tokens_basic}")

# estimate tokens for advanced mode
configs = load_data_local(file_path="benchmark_data", name="sortbench", mode="advanced", version="v1.0")
total_tokens_adv = 0
for config in configs.values():
    for list in config.values():
        total_tokens_adv += len(enc.encode(f"{list}"))

print(f"Total tokens for advanced mode: {total_tokens_adv}")

print(f"Total tokens: {total_tokens_basic + total_tokens_adv}")
print("")

model_costs = {"gpt-4o": {"input": 2.0, "output": 10.0},
               "gpt-o1": {"input": 15.0, "output": 60.0},
               "gpt-4o-mini": {"input": 0.15, "output": 0.6},
               "gpt-3.5-turbo": {"input": 3.0, "output": 6.0}
               }

total_costs = 0
for model, costs in model_costs.items():
    costs_input = (total_tokens_basic * costs['input'] + total_tokens_adv * costs['input']) / 1000000
    costs_output = (total_tokens_basic * costs['output'] + total_tokens_adv * costs['output']) / 1000000
    costs_model = costs_input + costs_output    
    total_costs += costs_model

    print(f"Model: {model}")
    print(f"Input costs: {costs_input}")
    print(f"Output costs: {costs_output}")
    print(f"Total costs: {costs_model}")
    print("")
print(f"Total costs: {total_costs}")

Total tokens for basic mode: 88430
Total tokens for advanced mode: 319621
Total tokens: 408051

Model: gpt-4o
Input costs: 0.17686
Output costs: 0.8843
Total costs: 1.0611599999999999

Model: gpt-o1
Input costs: 1.32645
Output costs: 5.3058
Total costs: 6.632249999999999

Model: gpt-4o-mini
Input costs: 0.0132645
Output costs: 0.053058
Total costs: 0.0663225

Model: gpt-3.5-turbo
Input costs: 0.26529
Output costs: 0.53058
Total costs: 0.7958700000000001

Total costs: 8.555602499999999
