# Tokens 구조와 사용량에 대한 이해

gpt-35-turbo가 배포되어 있을 때, 정상적으로 동작합니다.  
gpt-4o와 gpt-4o mini를 위한 새로운 토크나이저를 활용하는 예제가 추가 되었습니다.

In [1]:
import os
from openai import AzureOpenAI
import tiktoken
from dotenv import load_dotenv
load_dotenv()

client = AzureOpenAI(
    azure_endpoint = os.getenv("AZURE_OPENAI_ENDPOINT","").strip(),
    api_key        = os.getenv("AZURE_OPENAI_API_KEY"),
    api_version    = os.getenv("OPENAI_API_VERSION")
)

encoding = tiktoken.encoding_for_model("gpt-3.5-turbo")
# deployment_name = os.getenv('DEPLOYMENT_NAME')
deployment_name = "gpt-35-turbo"

In [2]:
prompt = "Azure OpenAI service is General Available now!"

tokens = encoding.encode(prompt)
print('Total number of tokens:', len(tokens))
print('Tokens :', tokens)
print('Words : ', [encoding.decode([t]) for t in tokens])

Total number of tokens: 9
Tokens : [79207, 5377, 15836, 2532, 374, 3331, 16528, 1457, 0]
Words :  ['Azure', ' Open', 'AI', ' service', ' is', ' General', ' Available', ' now', '!']


In [3]:
response = client.chat.completions.create(
    model=deployment_name,
    messages=[
        {"role": "system", "content": "You are a helpful assistant."},
        {"role": "user", "content": prompt},
    ],
  temperature=0.7,
  max_tokens=60,
  n=2
)

# Show 2 returned results

In [4]:
for i in range(len(response.choices)):
    assistant_msg = (response.choices[i].message.content)
    print('='*30, 'ANSWER #' + str(i+1), '='*30)
    print('Completion_tokens:', len(encoding.encode(assistant_msg)))
    print(assistant_msg)

Completion_tokens: 60
That's great news! The Azure OpenAI service provides a range of powerful AI capabilities, including natural language processing, machine learning, and computer vision. With this service, developers can easily incorporate AI capabilities into their applications without having to build and train their own models from scratch. This can save a lot
Completion_tokens: 60
That's great news! Azure OpenAI service is an AI platform that allows developers to build and deploy AI models at scale. With its general availability, developers can now access and use its features like Natural Language Processing (NLP), Machine Learning, Computer Vision, and more to develop intelligent applications that can


# Usage

In [5]:
response.usage

CompletionUsage(completion_tokens=120, prompt_tokens=28, total_tokens=148)

In [6]:
prompt = "안녕하세요 저는 한국 사람입니다"

tokens = encoding.encode(prompt)
print('Total number of tokens:', len(tokens))
print('Tokens :', tokens)
print('Words : ', [encoding.decode([t]) for t in tokens])

Total number of tokens: 15
Tokens : [31495, 230, 75265, 243, 92245, 19097, 222, 16969, 62398, 89059, 255, 33229, 39519, 234, 80052]
Words :  ['�', '�', '�', '�', '하세요', ' �', '�', '는', ' 한', '�', '�', ' 사', '�', '�', '입니다']


## GPT-4o & GPT-4o mini 개선된 Tokenizer

In [7]:
enc = tiktoken.get_encoding("o200k_base")
assert enc.decode(enc.encode("hello world")) == "hello world"

# To get the tokeniser corresponding to a specific model in the OpenAI API:
enc = tiktoken.encoding_for_model("gpt-4o")

In [8]:
tokens = enc.encode(prompt)
print('Total number of tokens:', len(tokens))
print('Tokens :', tokens)
print('Words : ', [enc.decode([t]) for t in tokens])

Total number of tokens: 6
Tokens : [14307, 171731, 199090, 52971, 34014, 27001]
Words :  ['안', '녕하세요', ' 저는', ' 한국', ' 사람', '입니다']
