In [1]:
import tiktoken

In [2]:
# encoding = tiktoken.get_encoding("cl100k_base")
encoding = tiktoken.encoding_for_model("gpt-4-turbo")

In [3]:
def extract_encodings(example_string: str, model_name: str) -> None:

    print(f'\nExample string: "{example_string}"')
    # for each encoding, print the # of tokens, the token integers, and the token bytes
    try:
        encoding = tiktoken.get_encoding(model_name)
    except:
        encoding = tiktoken.get_encoding("cl100k_base")
    
    token_integers = encoding.encode(example_string)
    num_tokens = len(token_integers)
    token_bytes = [encoding.decode_single_token_bytes(token) for token in token_integers]
    print(f"{num_tokens} tokens")
    print(f"token integers: {token_integers}")
    print(f"token bytes: {token_bytes}")


In [4]:
extract_encodings("Hello, my name is Gieun", 'gpt-4-turbo-preview')


Example string: "Hello, my name is Gieun"
8 tokens
token integers: [9906, 11, 856, 836, 374, 480, 648, 359]
token bytes: [b'Hello', b',', b' my', b' name', b' is', b' G', b'ie', b'un']


In [5]:
extract_encodings("""Dawa from Arun Treks also gave oxygen to David and tried to help him move, 
                  repeatedly, for perhaps an hour. But he could not get David to stand alone or even stand to 
                  rest on his shoulders, and crying, Dawa had to leave him too. 
                  Even with two Sherpas, it was not going to be possible to get David down the tricky sections below""",
                  'gpt-4-turbo-preview')


Example string: "Dawa from Arun Treks also gave oxygen to David and tried to help him move, 
                  repeatedly, for perhaps an hour. But he could not get David to stand alone or even stand to 
                  rest on his shoulders, and crying, Dawa had to leave him too. 
                  Even with two Sherpas, it was not going to be possible to get David down the tricky sections below"
82 tokens
token integers: [35, 14406, 505, 1676, 359, 12758, 2857, 1101, 6688, 24463, 311, 6941, 323, 6818, 311, 1520, 1461, 3351, 11, 720, 1733, 19352, 11, 369, 8530, 459, 6596, 13, 2030, 568, 1436, 539, 636, 6941, 311, 2559, 7636, 477, 1524, 2559, 311, 720, 1733, 2800, 389, 813, 28004, 11, 323, 31299, 11, 423, 14406, 1047, 311, 5387, 1461, 2288, 13, 720, 1733, 7570, 449, 1403, 17530, 42201, 11, 433, 574, 539, 2133, 311, 387, 3284, 311, 636, 6941, 1523, 279, 34553, 14491, 3770]
token bytes: [b'D', b'awa', b' from', b' Ar', b'un', b' Tre', b'ks', b' also', b' gave', b' oxygen', b' to', b' 

## Chat completion의 token count

In [6]:
import os
import openai

os.environ['OPENAI_API_KEY']= "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"
openai.api_key = os.environ["OPENAI_API_KEY"]

In [7]:
def num_tokens_from_messages(messages, model="gpt-3.5-turbo"):
    """Returns the number of tokens used by a list of messages."""
    try:
        encoding = tiktoken.encoding_for_model(model)
    except KeyError:
        encoding = tiktoken.get_encoding("cl100k_base")

    num_tokens = 0
    for message in messages:
        num_tokens += 4  # every message follows <im_start>{role/name}\n{content}<im_end>\n
        for key, value in message.items():
            num_tokens += len(encoding.encode(value))
            if key == "name":  # if there's a name, the role is omitted
                num_tokens += -1  # role is always required and always 1 token
    num_tokens += 2  # every reply is primed with <im_start>assistant
    return num_tokens

In [8]:
messages = [
  {"role": "system", "content": "You are a helpful, pattern-following assistant that translates corporate jargon into plain English."},
  {"role": "system", "name":"example_user", "content": "New synergies will help drive top-line growth."},
  {"role": "system", "name": "example_assistant", "content": "Things working well together will increase revenue."},
  {"role": "system", "name":"example_user", "content": "Let's circle back when we have more bandwidth to touch base on opportunities for increased leverage."},
  {"role": "system", "name": "example_assistant", "content": "Let's talk later when we're less busy about how to do better."},
  {"role": "user", "content": "This late pivot means we don't have time to boil the ocean for the client deliverable."},
]

model = "gpt-4-turbo-preview"

print(f"{num_tokens_from_messages(messages, model)} prompt tokens counted.")

126 prompt tokens counted.


In [9]:
from openai import OpenAI
client = OpenAI()

response = client.chat.completions.create(
  model=model,
  messages=messages,
  temperature=0,
)

print(f'{response.usage.prompt_tokens} prompt tokens used.')

129 prompt tokens used.


In [10]:
response.usage

CompletionUsage(completion_tokens=16, prompt_tokens=129, total_tokens=145)

In [11]:
print("Input token count : {}, cost : {}".format(response.usage.prompt_tokens, 10 * (response.usage.prompt_tokens/1000000)))
print("Input token count : {}, cost : {}".format(response.usage.completion_tokens, 30 * (response.usage.completion_tokens/1000000)))

Input token count : 129, cost : 0.00129
Input token count : 16, cost : 0.00047999999999999996


- short prompt -> generate long completion
- long prompt -> generate short compeltion

### Limit tokens

In [14]:
response = client.chat.completions.create(
  model=model,
  messages=messages,
  temperature=0,
  max_tokens=5
)

print(f'{response.usage} prompt tokens used.')

CompletionUsage(completion_tokens=5, prompt_tokens=129, total_tokens=134) prompt tokens used.


In [15]:
response.choices[0].message.content

'Changing direction now means we'

In [13]:
response.choices[0].message.content

"Changing direction now means we can't do everything for the client's project."

---

## Agent의 token count

In [16]:
import tiktoken
from llama_index.core.callbacks import CallbackManager, TokenCountingHandler
from llama_index.llms.openai import OpenAI
from llama_index.core import Settings

token_counter = TokenCountingHandler(
    tokenizer=tiktoken.encoding_for_model("gpt-4-turbo-preview").encode
)

Settings.llm = OpenAI(model="gpt-4-turbo-preview", temperature=0)
Settings.callback_manager = CallbackManager([token_counter])

In [17]:
from llama_index.core import SimpleDirectoryReader
from llama_index.core import VectorStoreIndex

documents = SimpleDirectoryReader("sample").load_data()

index = VectorStoreIndex.from_documents(documents)

token_counter.reset_counts()

In [18]:
token_counter.reset_counts()

query_engine = index.as_query_engine(similarity_top_k=4)
response = query_engine.query("Summarize the text for me.")


In [19]:
response

Response(response='The text provides a collection of references and citations related to Mount Everest, covering various topics such as notable climbing events, research studies, and editorial opinions. It mentions specific incidents, achievements, and tragedies associated with Everest expeditions, including a high-altitude rescue in 2007, climbing seasons, and the impact of altitude on the brain. Additionally, it references discussions on the ethics of climbing Everest, the dangers faced by Sherpas, and the environmental and human challenges of high-altitude mountaineering. The text also lists categories related to article reliability, sourcing, and specific content types on Wikipedia, indicating a focus on the documentation and verification of information related to Mount Everest and related topics.', source_nodes=[NodeWithScore(node=TextNode(id_='4d042b9a-0449-4791-8118-d49597d3e183', embedding=None, metadata={'file_path': '/Users/gieunkwak/Data_Analytics/fastcampus_강의자료/repo/0

In [22]:
print(
    "Embedding Tokens: ",
    token_counter.total_embedding_token_count,
    "\n",
    "LLM Prompt Tokens: ",
    token_counter.prompt_llm_token_count,
    "\n",
    "LLM Completion Tokens: ",
    token_counter.completion_llm_token_count,
    "\n",
    "Total LLM Token Count: ",
    token_counter.total_llm_token_count,
    "\n",
)


Embedding Tokens:  0 
 LLM Prompt Tokens:  0 
 LLM Completion Tokens:  0 
 Total LLM Token Count:  0 



In [21]:
token_counter.reset_counts()