From c1a29d76d51c60cd8e7fa16b5ff3d2bac84017f0 Mon Sep 17 00:00:00 2001 From: Enrico Ros Date: Tue, 23 Jan 2024 03:00:04 -0800 Subject: [PATCH 1/2] Roll 'tiktoken' (fka. @dqbd/tiktoken) --- package-lock.json | 12 ++++++------ package.json | 2 +- src/common/util/token-counter.ts | 2 +- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/package-lock.json b/package-lock.json index b6b3b2648..1cb96184b 100644 --- a/package-lock.json +++ b/package-lock.json @@ -9,7 +9,6 @@ "version": "1.11.0", "hasInstallScript": true, "dependencies": { - "@dqbd/tiktoken": "^1.0.7", "@emotion/cache": "^11.11.0", "@emotion/react": "^11.11.3", "@emotion/server": "^11.11.0", @@ -45,6 +44,7 @@ "remark-gfm": "^4.0.0", "superjson": "^2.2.1", "tesseract.js": "^5.0.4", + "tiktoken": "^1.0.11", "uuid": "^9.0.1", "zod": "^3.22.4", "zustand": "^4.4.7" @@ -301,11 +301,6 @@ "node": ">=14.1.0" } }, - "node_modules/@dqbd/tiktoken": { - "version": "1.0.7", - "resolved": "https://registry.npmjs.org/@dqbd/tiktoken/-/tiktoken-1.0.7.tgz", - "integrity": "sha512-bhR5k5W+8GLzysjk8zTMVygQZsgvf7W1F0IlL4ZQ5ugjo5rCyiwGM5d8DYriXspytfu98tv59niang3/T+FoDw==" - }, "node_modules/@emotion/babel-plugin": { "version": "11.11.0", "resolved": "https://registry.npmjs.org/@emotion/babel-plugin/-/babel-plugin-11.11.0.tgz", @@ -6783,6 +6778,11 @@ "xtend": "~2.1.1" } }, + "node_modules/tiktoken": { + "version": "1.0.11", + "resolved": "https://registry.npmjs.org/tiktoken/-/tiktoken-1.0.11.tgz", + "integrity": "sha512-aMJcn9NGmb6zDXkCweJLnACyIyjdiYIk1odAfnCUvin7O1QsV1rQP1hatGDMhQovxkeSJhFeU7QuGkbDHGciDQ==" + }, "node_modules/tiny-invariant": { "version": "1.3.1", "resolved": "https://registry.npmjs.org/tiny-invariant/-/tiny-invariant-1.3.1.tgz", diff --git a/package.json b/package.json index b02c30430..5d6980601 100644 --- a/package.json +++ b/package.json @@ -13,7 +13,6 @@ "db:studio": "prisma studio" }, "dependencies": { - "@dqbd/tiktoken": "^1.0.7", "@emotion/cache": "^11.11.0", "@emotion/react": "^11.11.3", "@emotion/server": "^11.11.0", @@ -49,6 +48,7 @@ "remark-gfm": "^4.0.0", "superjson": "^2.2.1", "tesseract.js": "^5.0.4", + "tiktoken": "^1.0.11", "uuid": "^9.0.1", "zod": "^3.22.4", "zustand": "^4.4.7" diff --git a/src/common/util/token-counter.ts b/src/common/util/token-counter.ts index b89752dc7..ca2dc599a 100644 --- a/src/common/util/token-counter.ts +++ b/src/common/util/token-counter.ts @@ -1,4 +1,4 @@ -import { encoding_for_model, get_encoding, Tiktoken, TiktokenModel } from '@dqbd/tiktoken'; +import { encoding_for_model, get_encoding, Tiktoken, TiktokenModel } from 'tiktoken'; import { DLLMId, findLLMOrThrow, useModelsStore } from '~/modules/llms/store-llms'; From b14cd47a7b3db066f96ab95eb1a9e049ad27e58c Mon Sep 17 00:00:00 2001 From: Enrico Ros Date: Tue, 23 Jan 2024 03:00:18 -0800 Subject: [PATCH 2/2] Tiktoken [x4, port]: successfully defer the library load, with large interactivity improvements The rationals that TTFP would be a more important metric than awaiting the 1.2MB dependency at every page load (cherry picked from commit 3a8195a02b8ae444a52ad1e16e05760c6539e41a) (cherry picked from commit 808077bc2bce656258235baa0e919d96f0cf79e6) (cherry picked from commit 76f6c7917c0941146a37491995d8a3c8bb6575d8) (cherry picked from commit fc1fc918459f84a72f2cfcee0e88687d6c0daafb) --- .../chat/components/composer/Composer.tsx | 2 +- .../components/composer/attachments/port.ts | 2 +- .../composer/attachments/useLLMAttachments.ts | 2 +- .../providers/ProviderBackendAndNoSSR.tsx | 19 ++++-- src/common/state/store-chats.ts | 4 +- src/common/util/token-counter.ts | 59 ++++++++++++++++--- src/modules/aifn/summarize/ContentReducer.tsx | 2 +- src/modules/backend/state-backend.ts | 2 +- 8 files changed, 71 insertions(+), 21 deletions(-) diff --git a/src/apps/chat/components/composer/Composer.tsx b/src/apps/chat/components/composer/Composer.tsx index 41640f074..91eadb278 100644 --- a/src/apps/chat/components/composer/Composer.tsx +++ b/src/apps/chat/components/composer/Composer.tsx @@ -130,7 +130,7 @@ export function Composer(props: { const tokensComposerText = React.useMemo(() => { if (!debouncedText || !chatLLMId) return 0; - return countModelTokens(debouncedText, chatLLMId, 'composer text'); + return countModelTokens(debouncedText, chatLLMId, 'composer text') ?? 0; }, [chatLLMId, debouncedText]); let tokensComposer = tokensComposerText + llmAttachments.tokenCountApprox; if (tokensComposer > 0) diff --git a/src/apps/chat/components/composer/attachments/port.ts b/src/apps/chat/components/composer/attachments/port.ts index 84f0607ce..945ea6e80 100644 --- a/src/apps/chat/components/composer/attachments/port.ts +++ b/src/apps/chat/components/composer/attachments/port.ts @@ -24,7 +24,7 @@ import { ContentReducer } from '~/modules/aifn/summarize/ContentReducer'; // see how we fare on budget if (chatLLMId) { - const newTextTokens = countModelTokens(newText, chatLLMId, 'reducer trigger'); + const newTextTokens = countModelTokens(newText, chatLLMId, 'reducer trigger') ?? 0; // simple trigger for the reduction dialog if (newTextTokens > remainingTokens) { diff --git a/src/apps/chat/components/composer/attachments/useLLMAttachments.ts b/src/apps/chat/components/composer/attachments/useLLMAttachments.ts index c095e03f4..92bc2e300 100644 --- a/src/apps/chat/components/composer/attachments/useLLMAttachments.ts +++ b/src/apps/chat/components/composer/attachments/useLLMAttachments.ts @@ -78,7 +78,7 @@ function toLLMAttachment(attachment: Attachment, supportedOutputPartTypes: Compo const tokenCountApprox = llmForTokenCount ? attachmentOutputs.reduce((acc, output) => { if (output.type === 'text-block') - return acc + countModelTokens(output.text, llmForTokenCount, 'attachments tokens count'); + return acc + (countModelTokens(output.text, llmForTokenCount, 'attachments tokens count') ?? 0); console.warn('Unhandled token preview for output type:', output.type); return acc; }, 0) diff --git a/src/common/providers/ProviderBackendAndNoSSR.tsx b/src/common/providers/ProviderBackendAndNoSSR.tsx index 2d4972d99..30cfe7f28 100644 --- a/src/common/providers/ProviderBackendAndNoSSR.tsx +++ b/src/common/providers/ProviderBackendAndNoSSR.tsx @@ -1,8 +1,10 @@ import * as React from 'react'; -import { useBackendCapsLoader } from '~/modules/backend/state-backend'; +import { useBackendCapsKnowledge } from '~/modules/backend/state-backend'; import { apiQuery } from '~/common/util/trpc.client'; +import { preloadTiktokenLibrary } from '~/common/util/token-counter'; + /** * Note: we used to have a NoSSR wrapper inside the AppLayout component (which was delaying rendering 1 cycle), @@ -11,20 +13,27 @@ import { apiQuery } from '~/common/util/trpc.client'; export function ProviderBackendAndNoSSR(props: { children: React.ReactNode }) { // external state - const [loaded, setCapabilties] = useBackendCapsLoader(); - + const [haveCapabilities, setCapabilties] = useBackendCapsKnowledge(); // load from the backend const { data: capabilities } = apiQuery.backend.listCapabilities.useQuery(undefined, { staleTime: 1000 * 60 * 60 * 24, // 1 day }); - // update the state + + // [effect] copy from the backend (capabilities) to the state (setCapabilties) React.useEffect(() => { if (capabilities) setCapabilties(capabilities); }, [capabilities, setCapabilties]); + + // [effect] in parallel preload the Tiktoken library - large WASM payload, so fire/forget + React.useEffect(() => { + void preloadTiktokenLibrary(); + }, []); + + // block rendering until the capabilities are loaded - return !loaded ? null : props.children; + return !haveCapabilities ? null : props.children; } \ No newline at end of file diff --git a/src/common/state/store-chats.ts b/src/common/state/store-chats.ts index 1ebc1e867..552576c0e 100644 --- a/src/common/state/store-chats.ts +++ b/src/common/state/store-chats.ts @@ -362,7 +362,7 @@ export const useChatStore = create()(devtools( ...updatedMessage, ...(setUpdated && { updated: Date.now() }), ...(((updatedMessage.typing === false || !message.typing) && chatLLMId && { - tokenCount: countModelTokens(updatedMessage.text || message.text, chatLLMId, 'editMessage(typing=false)'), + tokenCount: countModelTokens(updatedMessage.text || message.text, chatLLMId, 'editMessage(typing=false)') ?? 0, })), } : message); @@ -534,7 +534,7 @@ function _migrateLocalStorageData(): ChatState | {} { */ function updateDMessageTokenCount(message: DMessage, llmId: DLLMId | null, forceUpdate: boolean, debugFrom: string): number { if (forceUpdate || !message.tokenCount) - message.tokenCount = llmId ? countModelTokens(message.text, llmId, debugFrom) : 0; + message.tokenCount = llmId ? countModelTokens(message.text, llmId, debugFrom) ?? 0 : 0; return message.tokenCount; } diff --git a/src/common/util/token-counter.ts b/src/common/util/token-counter.ts index ca2dc599a..adfc06b78 100644 --- a/src/common/util/token-counter.ts +++ b/src/common/util/token-counter.ts @@ -1,30 +1,70 @@ -import { encoding_for_model, get_encoding, Tiktoken, TiktokenModel } from 'tiktoken'; +import type { Tiktoken, TiktokenEncoding, TiktokenModel } from 'tiktoken'; -import { DLLMId, findLLMOrThrow, useModelsStore } from '~/modules/llms/store-llms'; +import { DLLMId, findLLMOrThrow } from '~/modules/llms/store-llms'; // Do not set this to true in production, it's very verbose const DEBUG_TOKEN_COUNT = false; +// global symbols to dynamically load the Tiktoken library +let get_encoding: ((encoding: TiktokenEncoding) => Tiktoken) | null = null; +let encoding_for_model: ((model: TiktokenModel) => Tiktoken) | null = null; +let preloadPromise: Promise | null = null; +let informTheUser = false; + +export function preloadTiktokenLibrary() { + if (!preloadPromise) { + preloadPromise = import('tiktoken') + .then(tiktoken => { + get_encoding = tiktoken.get_encoding; + encoding_for_model = tiktoken.encoding_for_model; + if (informTheUser) + console.warn('countModelTokens: Library loaded successfully'); + }) + .catch(error => { + console.error('countModelTokens: Failed to load Tiktoken library:', error); + preloadPromise = null; // Allow retrying if the import fails + throw error; // Re-throw the error to inform the caller + }); + } + return preloadPromise; +} + + /** * Wrapper around the Tiktoken library, to keep tokenizers for all models in a cache * * We also preload the tokenizer for the default model, so that the first time a user types * a message, it doesn't stall loading the tokenizer. */ -export const countModelTokens: (text: string, llmId: DLLMId, debugFrom: string) => number = (() => { +export const countModelTokens: (text: string, llmId: DLLMId, debugFrom: string) => number | null = (() => { // return () => 0; const tokenEncoders: { [modelId: string]: Tiktoken } = {}; + let encodingCL100K: Tiktoken | null = null; + + function _tokenCount(text: string, llmId: DLLMId, debugFrom: string): number | null { + + // The library shall have been preloaded - if not, attempt to start its loading and return null to indicate we're not ready to count + if (!encoding_for_model || !get_encoding) { + if (!informTheUser) { + console.warn('countModelTokens: Tiktoken library is not yet loaded, loading now...'); + informTheUser = true; + } + void preloadTiktokenLibrary(); // Attempt to preload without waiting. + return null; + } - function tokenCount(text: string, llmId: DLLMId, debugFrom: string): number { const { options: { llmRef: openaiModel } } = findLLMOrThrow(llmId); if (!openaiModel) throw new Error(`LLM ${llmId} has no LLM reference id`); if (!(openaiModel in tokenEncoders)) { try { tokenEncoders[openaiModel] = encoding_for_model(openaiModel as TiktokenModel); } catch (e) { - tokenEncoders[openaiModel] = get_encoding('cl100k_base'); + // make sure we recycle the default encoding across all models + if (!encodingCL100K) + encodingCL100K = get_encoding('cl100k_base'); + tokenEncoders[openaiModel] = encodingCL100K; } } let count: number = 0; @@ -40,10 +80,11 @@ export const countModelTokens: (text: string, llmId: DLLMId, debugFrom: string) return count; } + // NOTE: disabled on 2024-01-23, as the first load is more important than instant reactivity // preload the tokenizer for the default model - const { chatLLMId } = useModelsStore.getState(); - if (chatLLMId) - tokenCount('', chatLLMId, 'warmup'); + // const { chatLLMId } = useModelsStore.getState(); + // if (chatLLMId) + // _tokenCount('', chatLLMId, 'warmup'); - return tokenCount; + return _tokenCount; })(); \ No newline at end of file diff --git a/src/modules/aifn/summarize/ContentReducer.tsx b/src/modules/aifn/summarize/ContentReducer.tsx index b589e2e69..a2e6a8702 100644 --- a/src/modules/aifn/summarize/ContentReducer.tsx +++ b/src/modules/aifn/summarize/ContentReducer.tsx @@ -48,7 +48,7 @@ export function ContentReducer(props: { const [processing, setProcessing] = React.useState(false); // derived state - const reducedTokens = reducerModelId ? countModelTokens(reducedText, reducerModelId, 'content reducer reduce') : 0; + const reducedTokens = reducerModelId ? countModelTokens(reducedText, reducerModelId, 'content reducer reduce') ?? 0 : 0; const remainingTokens = props.tokenLimit - reducedTokens; diff --git a/src/modules/backend/state-backend.ts b/src/modules/backend/state-backend.ts index 2bbcdd945..7a28a543e 100644 --- a/src/modules/backend/state-backend.ts +++ b/src/modules/backend/state-backend.ts @@ -52,7 +52,7 @@ const useBackendStore = create()( ); -export function useBackendCapsLoader(): [boolean, (capabilities: Partial) => void] { +export function useBackendCapsKnowledge(): [boolean, (capabilities: Partial) => void] { return useBackendStore(state => [state.loadedCapabilities, state.setCapabilities], shallow); }