# Token Estimator

A notebook to aid with the cost estimations for this benchmarks. We estimate the number of tokens based on GPT-4o's tokenizer and then compute the API costs for various models based on current rates. 

In [1]:
# we run the notebook from the top-level of the repo
%cd ..

/home/sherbold/git/sortbench


  self.shell.db['dhist'] = compress_dhist(dhist)[-100:]


In [2]:
import tiktoken

from sortbench.util.data_utils import load_data_local

enc = tiktoken.encoding_for_model("gpt-4o")

modes = ["basic", "advanced", "debug"]
tokens_per_mode = {}

for mode in modes:
    configs = load_data_local(file_path="benchmark_data", name="sortbench", mode=mode, version="v1.0")
    for config in configs.values():
        for list in config.values():
            if mode not in tokens_per_mode:
                tokens_per_mode[mode] = 0
            tokens_per_mode[mode] += len(enc.encode(f"{list}"))
    print(f"Total tokens for {mode} mode: {tokens_per_mode[mode]}")

total_tokens = sum(tokens_per_mode.values())
print(f"Total tokens: {total_tokens}")
print()

model_costs = {"gpt-4o": {"input": 2.0, "output": 10.0},
               "gpt-o1": {"input": 15.0, "output": 60.0},
               "gpt-4o-mini": {"input": 0.15, "output": 0.6},
               "gpt-3.5-turbo": {"input": 3.0, "output": 6.0},
               "claude-opus": {"input": 15.0, "output": 75.0},
               "claude-sonnet": {"input": 3.0, "output": 15.0},
               "claude-haiku": {"input": 0.8, "output": 4.0},
               }

total_costs = 0
for model, costs in model_costs.items():
    costs_input = (total_tokens * costs['input']) / 1000000
    costs_output = (total_tokens * costs['output']) / 1000000
    costs_model = costs_input + costs_output    
    total_costs += costs_model

    print(f"Model: {model}")
    print(f"Input costs: {costs_input}")
    print(f"Output costs: {costs_output}")
    print(f"Total costs: {costs_model}")
    print("")
print(f"Total costs (cloud): {total_costs}")
print()

local_model_costs = {"llama3.1": {"input": 0.00085, "output": 0.0012},
                     "gemma2": {"input": 0.00040, "output": 0.0006},
                     "qwen2.5:": {"input": 0.00085, "output": 0.0012},
                     "deepseekr1": {"input": 0.000421, "output": 0.000520}
                     }

total_costs_local = 0
for model, costs in local_model_costs.items():
    costs_input = (total_tokens * costs['input'])
    costs_output = (total_tokens * costs['output'])
    costs_model = costs_input + costs_output
    total_costs_local += costs_model

    print(f"Model: {model}")
    print(f"Input costs: {costs_input}")
    print(f"Output costs: {costs_output}")
    print(f"Total costs: {costs_model}")
    print("")

print(f"Total costs (local): {total_costs_local}")

Total tokens for basic mode: 88430
Total tokens for advanced mode: 319621
Total tokens for debug mode: 176512
Total tokens: 584563

Model: gpt-4o
Input costs: 1.169126
Output costs: 5.84563
Total costs: 7.014756

Model: gpt-o1
Input costs: 8.768445
Output costs: 35.07378
Total costs: 43.842225

Model: gpt-4o-mini
Input costs: 0.08768445
Output costs: 0.3507378
Total costs: 0.43842225

Model: gpt-3.5-turbo
Input costs: 1.753689
Output costs: 3.507378
Total costs: 5.261067000000001

Model: claude-opus
Input costs: 8.768445
Output costs: 43.842225
Total costs: 52.61067

Model: claude-sonnet
Input costs: 1.753689
Output costs: 8.768445
Total costs: 10.522134

Model: claude-haiku
Input costs: 0.4676504
Output costs: 2.338252
Total costs: 2.8059024000000004

Total costs (cloud): 122.49517664999999

Model: llama3.1
Input costs: 496.87854999999996
Output costs: 701.4756
Total costs: 1198.35415

Model: gemma2
Input costs: 233.82520000000002
Output costs: 350.7378
Total costs: 584.563

Model: qw

In [18]:
from transformers import AutoTokenizer

hf_access_token = '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'

max_length_gpt = 0
max_length_llama = 0
max_length_qwen = 0
max_length_deepseekr = 0
max_length_gemma = 0
longest_list_gpt = None
longest_list_llama = None
longest_list_qwen = None
longest_list_deepseekr = None
longest_list_gemma = None

llama_tokenizer = AutoTokenizer.from_pretrained("meta-llama/Meta-Llama-3.1-70B-Instruct", token=hf_access_token)
gemma_tokenizer = AutoTokenizer.from_pretrained("google/gemma-2-27b", token=hf_access_token)
qwen_tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen2.5-72B-Instruct", token=hf_access_token)
deepseekr_tokenizer = AutoTokenizer.from_pretrained("deepseek-ai/DeepSeek-R1-Distill-Llama-70B", token=hf_access_token)

for mode in modes:
    configs = load_data_local(file_path="benchmark_data", name="sortbench", mode=mode, version="v1.0")
    for config in configs.values():
        for list in config.values():
            if mode not in tokens_per_mode:
                tokens_per_mode[mode] = 0
            lst_str = f"{list}"
            length_gpt = len(enc.encode(lst_str))
            length_llama = llama_tokenizer(lst_str, return_tensors="pt").input_ids.shape[1]
            length_gemma = gemma_tokenizer(lst_str, return_tensors="pt").input_ids.shape[1]
            length_qwen = qwen_tokenizer(lst_str, return_tensors="pt").input_ids.shape[1]
            length_deepseekr = deepseekr_tokenizer(lst_str, return_tensors="pt").input_ids.shape[1]
            
            if length_gpt > max_length_gpt:
                max_length_gpt = length_gpt
                longest_list_gpt = list
            if length_llama > max_length_llama:
                max_length_llama = length_llama
                longest_list_llama = list
            if length_gemma > max_length_gemma:
                max_length_gemma = length_gemma
                longest_list_gemma = list
            if length_qwen > max_length_qwen:
                max_length_qwen = length_qwen
                longest_list_qwen = list
            if length_deepseekr > max_length_deepseekr:
                max_length_deepseekr = length_deepseekr
                longest_list_deepseekr = list


print(f"Max length GPT: {max_length_gpt}")
print(f"Longest list: {longest_list_gpt}")
print(f"Max length LLAMA: {max_length_llama}")
print(f"Longest list: {longest_list_llama}")
print(f"Max length Gemma: {max_length_gemma}")
print(f"Longest list: {longest_list_gemma}")
print(f"Max length Qwen: {max_length_qwen}")
print(f"Longest list: {longest_list_qwen}")
str_lst = f"{longest_list_qwen}"
print(f"Num characters: {len(str_lst)}")
print(f"Max length DeepSeekr: {max_length_deepseekr}")
print(f"Longest list: {longest_list_deepseekr}")

Max length GPT: 3149
Longest list: [8.297948137308201e-05, 3.771424684448257e-05, 5.3242791247233915e-05, 5.512147341341556e-05, 8.12902343596352e-05, 1.23001641476973e-06, 5.130468382170117e-06, 9.585281075687535e-05, 8.183328099796325e-05, 4.6569529385289377e-05, 7.626372878271445e-05, 3.066825616966281e-05, 8.995208475457035e-05, 2.2314281827203275e-05, 9.910264686755343e-05, 3.607528669590859e-05, 1.0450790893294105e-05, 9.806122666673979e-05, 6.626006068517084e-05, 1.4138634237646442e-05, 8.861797757289328e-05, 6.808049113078361e-05, 3.425990181277183e-05, 8.774392625899877e-05, 9.715966064853417e-05, 4.754074122559749e-06, 1.5158680648786883e-05, 2.0394114963000167e-05, 3.304018564243384e-05, 5.687407319327985e-05, 9.814792664501214e-05, 3.317906332176446e-05, 9.969559133414597e-05, 9.72319321547541e-05, 1.2692366256595544e-05, 8.574632502690474e-05, 6.0108381541957684e-05, 3.2056735124533145e-05, 6.727445667753832e-05, 9.097746364219084e-05, 4.435001866492493e-05, 8.269367427586