-
Notifications
You must be signed in to change notification settings - Fork 42
/
tokens.go
60 lines (49 loc) · 1.71 KB
/
tokens.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
package openai
import (
"regexp"
)
// https://platform.openai.com/docs/models/gpt-3-5
// https://platform.openai.com/docs/models/gpt-4
var maxTokens = map[string]int{
"gpt-4": 8192,
"gpt-4-32k": 32768,
"gpt-4-1106-preview": 128000,
"gpt-4-vision-preview": 128000,
"gpt-3.5-turbo-16k": 16385,
"gpt-3.5-turbo": 4096,
"dummy-test": 100, // just for testing
}
var modelDateRe = regexp.MustCompile(`-\d{4}`)
// truncateMessages will truncate the messages to fit into the max tokens limit of the model
// we always try to keep the last message, so we will truncate the first messages
func truncateMessages(model string, inputMessages []ChatMessage) ([]ChatMessage, int, int) {
outputMessages := make([]ChatMessage, 0, len(inputMessages))
currentTokens := 0
truncatedMessages := 0
maxTokens := getMaxTokensForModel(model)
for _, message := range inputMessages {
tokens := estimateTokensForMessage(message.Content)
if currentTokens+tokens >= maxTokens {
truncatedMessages++
continue
}
currentTokens += tokens
outputMessages = append(outputMessages, message)
}
return outputMessages, currentTokens, truncatedMessages
}
func getMaxTokensForModel(model string) int {
if maxTokens, ok := maxTokens[model]; ok {
return maxTokens
}
if modelDateRe.MatchString(model) {
return getMaxTokensForModel(modelDateRe.ReplaceAllString(model, ""))
}
// we need some default, keep it high, as new models will most likely support more tokens
return 128000
}
// to lower the dependency to heavy external libs we use the rule of thumbs which is totally fine here
// https://platform.openai.com/tokenizer
func estimateTokensForMessage(message string) int {
return len(message) / 4
}