-
Notifications
You must be signed in to change notification settings - Fork 248
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
11 changed files
with
2,380 additions
and
1,411 deletions.
There are no files selected for viewing
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,50 @@ | ||
import tiktoken | ||
|
||
MODELS_2_TOKEN_LIMITS = { | ||
"gpt-35-turbo": 4000, | ||
"gpt-3.5-turbo": 4000, | ||
"gpt-35-turbo-16k": 16000, | ||
"gpt-3.5-turbo-16k": 16000, | ||
"gpt-4": 8100, | ||
"gpt-4-32k": 32000 | ||
} | ||
|
||
AOAI_2_OAI = { | ||
"gpt-35-turbo": "gpt-3.5-turbo", | ||
"gpt-35-turbo-16k": "gpt-3.5-turbo-16k" | ||
} | ||
|
||
def getTokenLimit(modelId: str) -> int: | ||
if modelId not in MODELS_2_TOKEN_LIMITS: | ||
raise ValueError("Expected model gpt-35-turbo and above") | ||
return MODELS_2_TOKEN_LIMITS.get(modelId) | ||
|
||
|
||
def numTokenFromMessages(message: dict[str, str], model: str) -> int: | ||
""" | ||
Calculate the number of tokens required to encode a message. | ||
Args: | ||
message (dict): The message to encode, represented as a dictionary. | ||
model (str): The name of the model to use for encoding. | ||
Returns: | ||
int: The total number of tokens required to encode the message. | ||
Example: | ||
message = {'role': 'user', 'content': 'Hello, how are you?'} | ||
model = 'gpt-3.5-turbo' | ||
numTokenFromMessages(message, model) | ||
output: 11 | ||
""" | ||
encoding = tiktoken.encoding_for_model(getOaiChatModel(model)) | ||
num_tokens = 2 # For "role" and "content" keys | ||
for key, value in message.items(): | ||
num_tokens += len(encoding.encode(value)) | ||
return num_tokens | ||
|
||
|
||
def getOaiChatModel(aoaimodel: str) -> str: | ||
message = "Expected Azure OpenAI ChatGPT model name" | ||
if aoaimodel == "" or aoaimodel is None: | ||
raise ValueError(message) | ||
if aoaimodel not in AOAI_2_OAI and aoaimodel not in MODELS_2_TOKEN_LIMITS: | ||
raise ValueError(message) | ||
return AOAI_2_OAI.get(aoaimodel) or aoaimodel |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.