## 计算openai中调用大模型的token数

详细介绍请看这个仓库 https://github.com/openai/openai-cookbook/blob/main/examples/How_to_count_tokens_with_tiktoken.ipynb

### tiktoken下载

In [26]:
! pip install --upgrade tiktoken -q
! pip install --upgrade openai -q

In [27]:
import tiktoken

### encoding

两种不同的方式使用，一种是根据Encoding name，另一种是根据OpenAI models

In [28]:
encoding = tiktoken.get_encoding("cl100k_base")
encoding.encode("tiktoken is great!")

[83, 1609, 5963, 374, 2294, 0]

In [29]:
encoding = tiktoken.encoding_for_model("gpt-4o-mini")
encoding.encode("tiktoken is great!")

[83, 8251, 2488, 382, 2212, 0]

### decoding

In [30]:
encoding = tiktoken.encoding_for_model("gpt-4o-mini")
encoding.decode([83, 8251, 2488, 382, 2212, 0])

'tiktoken is great!'

In [31]:
[encoding.decode_single_token_bytes(token) for token in [83, 8251, 2488, 382, 2212, 0]]

[b't', b'ikt', b'oken', b' is', b' great', b'!']

### 计算token数（api calls）

In [32]:
def num_tokens_from_messages(messages, model="gpt-4o-mini-2024-07-18"):
    """Return the number of tokens used by a list of messages."""
    try:
        encoding = tiktoken.encoding_for_model(model)
    except KeyError:
        print("Warning: model not found. Using o200k_base encoding.")
        encoding = tiktoken.get_encoding("o200k_base")
    if model in {
        "gpt-3.5-turbo-0125",
        "gpt-4-0314",
        "gpt-4-32k-0314",
        "gpt-4-0613",
        "gpt-4-32k-0613",
        "gpt-4o-mini-2024-07-18",
        "gpt-4o-2024-08-06"
        }:
        tokens_per_message = 3
        tokens_per_name = 1
    elif "gpt-3.5-turbo" in model:
        print("Warning: gpt-3.5-turbo may update over time. Returning num tokens assuming gpt-3.5-turbo-0125.")
        return num_tokens_from_messages(messages, model="gpt-3.5-turbo-0125")
    elif "gpt-4o-mini" in model:
        print("Warning: gpt-4o-mini may update over time. Returning num tokens assuming gpt-4o-mini-2024-07-18.")
        return num_tokens_from_messages(messages, model="gpt-4o-mini-2024-07-18")
    elif "gpt-4o" in model:
        print("Warning: gpt-4o and gpt-4o-mini may update over time. Returning num tokens assuming gpt-4o-2024-08-06.")
        return num_tokens_from_messages(messages, model="gpt-4o-2024-08-06")
    elif "gpt-4" in model:
        print("Warning: gpt-4 may update over time. Returning num tokens assuming gpt-4-0613.")
        return num_tokens_from_messages(messages, model="gpt-4-0613")
    else:
        raise NotImplementedError(
            f"""num_tokens_from_messages() is not implemented for model {model}."""
        )
    num_tokens = 0
    for message in messages:
        num_tokens += tokens_per_message
        for key, value in message.items():
            num_tokens += len(encoding.encode(value))
            if key == "name":
                num_tokens += tokens_per_name
    num_tokens += 3  # every reply is primed with <|start|>assistant<|message|>
    return num_tokens

In [33]:
# let's verify the function above matches the OpenAI API response

from openai import OpenAI
import os

client = OpenAI(api_key=os.environ.get("OPENAI_API_KEY", "your_api_key"))

example_messages = [
    {
        "role": "system",
        "content": "You are a helpful, pattern-following assistant that translates corporate jargon into plain English.",
    },
    {
        "role": "user",
        "content": "This late pivot means we don't have time to boil the ocean for the client deliverable.",
    },
]

for model in [
    "gpt-3.5-turbo",
    "gpt-4-0613",
    "gpt-4",
    "gpt-4o",
    "gpt-4o-mini"
    ]:
    print(model)
    # example token count from the function defined above
    print(f"{num_tokens_from_messages(example_messages, model)} prompt tokens counted by num_tokens_from_messages().")
    # example token count from the OpenAI API
    response = client.chat.completions.create(model=model,
    messages=example_messages,
    temperature=0,
    max_tokens=1000)
    print(f"response: {response}")
    print(f'{response.usage.prompt_tokens} prompt tokens counted by the OpenAI API.')
    print()

gpt-3.5-turbo
48 prompt tokens counted by num_tokens_from_messages().
response: ChatCompletion(id='chatcmpl-AHli5UCYXaGPCL45mbPfELAgvKNDx', choices=[Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content="This sudden change in direction means we don't have enough time to complete an extensive project for the client.", refusal=None, role='assistant', function_call=None, tool_calls=None))], created=1728798337, model='gpt-3.5-turbo-0125', object='chat.completion', service_tier=None, system_fingerprint=None, usage=CompletionUsage(completion_tokens=21, prompt_tokens=48, total_tokens=69, completion_tokens_details=CompletionTokensDetails(audio_tokens=None, reasoning_tokens=0), prompt_tokens_details=PromptTokensDetails(audio_tokens=None, cached_tokens=0)))
48 prompt tokens counted by the OpenAI API.

gpt-4-0613
48 prompt tokens counted by num_tokens_from_messages().
response: ChatCompletion(id='chatcmpl-AHli61gpleKaztTx2yJXkk0DsvsZa', choices=[Choice(finish_

### 计算成本的token数（api calls）

In [34]:
def num_tokens_from_messages(messages, model="gpt-3.5-turbo-0613", is_input=True):
    input_pricing = 0.5/1000000
    output_pricing = 1.5/1000000
    """Return the number of tokens used by a list of messages."""
    try:
        encoding_name = tiktoken.encoding_for_model(model).name
        # encoding = tiktoken.encoding_for_model(model)
        encoding = tiktoken.get_encoding(encoding_name)
    except KeyError:
        print("Warning: model not found. Using o200k_base encoding.")
        encoding = tiktoken.get_encoding("o200k_base")
    if model in {
        "gpt-3.5-turbo-0613",
        "gpt-3.5-turbo-16k-0613",
        "gpt-3.5-turbo-0125",
        "gpt-4-0314",
        "gpt-4-32k-0314",
        "gpt-4-0613",
        "gpt-4-32k-0613",
        "gpt-4o"
        }:
        tokens_per_message = 3
        tokens_per_name = 1
    elif model == "gpt-3.5-turbo-0301":
        tokens_per_message = 4  # every message follows <|start|>{role/name}\n{content}<|end|>\n
        tokens_per_name = -1  # if there's a name, the role is omitted
    elif "gpt-3.5-turbo" in model:
        print("Warning: gpt-3.5-turbo may update over time. Returning num tokens assuming gpt-3.5-turbo-0613.")
        return num_tokens_from_messages(messages, model="gpt-3.5-turbo-0613")
    elif "gpt-4" in model:
        print("Warning: gpt-4 may update over time. Returning num tokens assuming gpt-4-0613.")
        return num_tokens_from_messages(messages, model="gpt-4-0613")
    else:
        raise NotImplementedError(
            f"""num_tokens_from_messages() is not implemented for model {model}."""
        )
    num_tokens = 0
    if is_input:
        for message in messages:
            num_tokens += tokens_per_message
            for key, value in message.items():
                num_tokens += len(encoding.encode(value))
                if key == "name":
                    num_tokens += tokens_per_name
        num_tokens += 3  # every reply is primed with <|start|>assistant<|message|>
        cost = num_tokens * input_pricing
    else:
        num_tokens += len(encoding.encode(messages))
        cost = num_tokens * output_pricing

    return num_tokens, cost