22 * Token calculation utilities for chat statistics
33 */
44
5- import AITokenizer , { type Encoding , models } from "ai-tokenizer" ;
6- import * as o200k_base from "ai-tokenizer/encoding/o200k_base" ;
7- import * as claude from "ai-tokenizer/encoding/claude" ;
85import { LRUCache } from "lru-cache" ;
96import CRC32 from "crc-32" ;
107import { getToolSchemas , getAvailableTools } from "@/utils/tools/toolDefinitions" ;
@@ -14,6 +11,58 @@ export interface Tokenizer {
1411 countTokens : ( text : string ) => number ;
1512}
1613
14+ /**
15+ * Lazy-loaded tokenizer modules to reduce startup time
16+ * These are loaded on first use with /4 approximation fallback
17+ *
18+ * eslint-disable-next-line @typescript-eslint/consistent-type-imports -- Dynamic imports are intentional for lazy loading
19+ */
20+ let tokenizerModules : {
21+ // eslint-disable-next-line @typescript-eslint/consistent-type-imports
22+ AITokenizer : typeof import ( "ai-tokenizer" ) . default ;
23+ // eslint-disable-next-line @typescript-eslint/consistent-type-imports
24+ models : typeof import ( "ai-tokenizer" ) . models ;
25+ // eslint-disable-next-line @typescript-eslint/consistent-type-imports
26+ o200k_base : typeof import ( "ai-tokenizer/encoding/o200k_base" ) ;
27+ // eslint-disable-next-line @typescript-eslint/consistent-type-imports
28+ claude : typeof import ( "ai-tokenizer/encoding/claude" ) ;
29+ } | null = null ;
30+
31+ let tokenizerLoadPromise : Promise < void > | null = null ;
32+
33+ /**
34+ * Load tokenizer modules asynchronously
35+ * Dynamic imports are intentional here to defer loading heavy tokenizer modules
36+ * until first use, reducing app startup time from ~8.8s to <1s
37+ *
38+ * @returns Promise that resolves when tokenizer modules are loaded
39+ */
40+ export async function loadTokenizerModules ( ) : Promise < void > {
41+ if ( tokenizerModules ) return ;
42+ if ( tokenizerLoadPromise ) return tokenizerLoadPromise ;
43+
44+ tokenizerLoadPromise = ( async ( ) => {
45+ // Performance: lazy load tokenizer modules to reduce startup time from ~8.8s to <1s
46+ /* eslint-disable no-restricted-syntax */
47+ const [ AITokenizerModule , modelsModule , o200k_base , claude ] = await Promise . all ( [
48+ import ( "ai-tokenizer" ) ,
49+ import ( "ai-tokenizer" ) ,
50+ import ( "ai-tokenizer/encoding/o200k_base" ) ,
51+ import ( "ai-tokenizer/encoding/claude" ) ,
52+ ] ) ;
53+ /* eslint-enable no-restricted-syntax */
54+
55+ tokenizerModules = {
56+ AITokenizer : AITokenizerModule . default ,
57+ models : modelsModule . models ,
58+ o200k_base,
59+ claude,
60+ } ;
61+ } ) ( ) ;
62+
63+ return tokenizerLoadPromise ;
64+ }
65+
1766/**
1867 * LRU cache for token counts by text checksum
1968 * Avoids re-tokenizing identical strings (system messages, tool definitions, etc.)
@@ -57,54 +106,81 @@ function countTokensCached(text: string, tokenizeFn: () => number | Promise<numb
57106}
58107
59108/**
60- * Get the appropriate tokenizer for a given model string
61- *
62- * @param modelString - Model identifier (e.g., "anthropic:claude-opus-4-1", "openai:gpt-4")
63- * @returns Tokenizer interface with name and countTokens function
109+ * Count tokens using loaded tokenizer modules
110+ * Assumes tokenizerModules is not null
64111 */
65- export function getTokenizerForModel ( modelString : string ) : Tokenizer {
112+ function countTokensWithLoadedModules (
113+ text : string ,
114+ modelString : string ,
115+ modules : NonNullable < typeof tokenizerModules >
116+ ) : number {
66117 const [ provider , modelId ] = modelString . split ( ":" ) ;
67- let model = models [ `${ provider } /${ modelId } ` as keyof typeof models ] ;
68- let hasExactTokenizer = true ;
118+ let model = modules . models [ `${ provider } /${ modelId } ` as keyof typeof modules . models ] ;
69119 if ( ! model ) {
70120 switch ( modelString ) {
71121 case "anthropic:claude-sonnet-4-5" :
72- model = models [ "anthropic/claude-sonnet-4.5" ] ;
122+ model = modules . models [ "anthropic/claude-sonnet-4.5" ] ;
73123 break ;
74124 default :
75125 // GPT-4o has pretty good approximation for most models.
76- model = models [ "openai/gpt-4o" ] ;
77- hasExactTokenizer = false ;
126+ model = modules . models [ "openai/gpt-4o" ] ;
78127 }
79128 }
80129
81- let encoding : Encoding ;
130+ let encoding : typeof modules . o200k_base | typeof modules . claude ;
82131 switch ( model . encoding ) {
83132 case "o200k_base" :
84- encoding = o200k_base ;
133+ encoding = modules . o200k_base ;
85134 break ;
86135 case "claude" :
87- encoding = claude ;
136+ encoding = modules . claude ;
88137 break ;
89138 default :
90139 // Do not include all encodings, as they are pretty big.
91140 // The most common one is o200k_base.
92- encoding = o200k_base ;
141+ encoding = modules . o200k_base ;
93142 break ;
94143 }
95- const tokenizer = new AITokenizer ( encoding ) ;
144+ const tokenizer = new modules . AITokenizer ( encoding ) ;
145+ return tokenizer . count ( text ) ;
146+ }
147+
148+ /**
149+ * Get the appropriate tokenizer for a given model string
150+ *
151+ * @param modelString - Model identifier (e.g., "anthropic:claude-opus-4-1", "openai:gpt-4")
152+ * @returns Tokenizer interface with name and countTokens function
153+ */
154+ export function getTokenizerForModel ( modelString : string ) : Tokenizer {
155+ // Start loading tokenizer modules in background (idempotent)
156+ void loadTokenizerModules ( ) ;
96157
97158 return {
98159 get name ( ) {
99- return hasExactTokenizer ? model . encoding : "approximation" ;
160+ return tokenizerModules ? "loaded" : "approximation" ;
100161 } ,
101162 countTokens : ( text : string ) => {
102- return countTokensCached ( text , ( ) => {
163+ // If tokenizer already loaded, use synchronous path for accurate counts
164+ if ( tokenizerModules ) {
165+ return countTokensCached ( text , ( ) => {
166+ try {
167+ return countTokensWithLoadedModules ( text , modelString , tokenizerModules ! ) ;
168+ } catch ( error ) {
169+ // Unexpected error during tokenization, fallback to approximation
170+ console . error ( "Failed to tokenize, falling back to approximation:" , error ) ;
171+ return Math . ceil ( text . length / 4 ) ;
172+ }
173+ } ) ;
174+ }
175+
176+ // Tokenizer not yet loaded - use async path (returns approximation immediately)
177+ return countTokensCached ( text , async ( ) => {
178+ await loadTokenizerModules ( ) ;
103179 try {
104- return tokenizer . count ( text ) ;
180+ return countTokensWithLoadedModules ( text , modelString , tokenizerModules ! ) ;
105181 } catch ( error ) {
106182 // Unexpected error during tokenization, fallback to approximation
107- console . error ( "Failed to tokenize with tiktoken , falling back to approximation:" , error ) ;
183+ console . error ( "Failed to tokenize, falling back to approximation:" , error ) ;
108184 return Math . ceil ( text . length / 4 ) ;
109185 }
110186 } ) ;
0 commit comments