|
2 | 2 | * Tests for tokenizer cache behavior |
3 | 3 | */ |
4 | 4 |
|
5 | | -import { describe, it, expect, beforeEach } from "@jest/globals"; |
| 5 | +import { describe, it, expect } from "@jest/globals"; |
6 | 6 | import { getTokenizerForModel } from "./tokenizer"; |
7 | 7 |
|
8 | 8 | describe("tokenizer cache", () => { |
9 | 9 | const testText = "Hello, world!"; |
10 | | - |
| 10 | + |
11 | 11 | it("should use different cache keys for different models", () => { |
12 | 12 | // Get tokenizers for different models |
13 | 13 | const gpt4Tokenizer = getTokenizerForModel("openai:gpt-4"); |
14 | 14 | const claudeTokenizer = getTokenizerForModel("anthropic:claude-opus-4"); |
15 | | - |
| 15 | + |
16 | 16 | // Count tokens with first model |
17 | 17 | const gpt4Count = gpt4Tokenizer.countTokens(testText); |
18 | | - |
| 18 | + |
19 | 19 | // Count tokens with second model |
20 | 20 | const claudeCount = claudeTokenizer.countTokens(testText); |
21 | | - |
| 21 | + |
22 | 22 | // Counts may differ because different encodings |
23 | 23 | // This test mainly ensures no crash and cache isolation |
24 | 24 | expect(typeof gpt4Count).toBe("number"); |
25 | 25 | expect(typeof claudeCount).toBe("number"); |
26 | 26 | expect(gpt4Count).toBeGreaterThan(0); |
27 | 27 | expect(claudeCount).toBeGreaterThan(0); |
28 | 28 | }); |
29 | | - |
| 29 | + |
30 | 30 | it("should return same count for same (model, text) pair from cache", () => { |
31 | 31 | const tokenizer = getTokenizerForModel("openai:gpt-4"); |
32 | | - |
| 32 | + |
33 | 33 | // First call |
34 | 34 | const count1 = tokenizer.countTokens(testText); |
35 | | - |
| 35 | + |
36 | 36 | // Second call should hit cache |
37 | 37 | const count2 = tokenizer.countTokens(testText); |
38 | | - |
| 38 | + |
39 | 39 | expect(count1).toBe(count2); |
40 | 40 | }); |
41 | | - |
| 41 | + |
42 | 42 | it("should normalize model keys for cache consistency", () => { |
43 | 43 | // These should map to the same cache key |
44 | 44 | const tokenizer1 = getTokenizerForModel("anthropic:claude-opus-4"); |
45 | 45 | const tokenizer2 = getTokenizerForModel("anthropic/claude-opus-4"); |
46 | | - |
| 46 | + |
47 | 47 | const count1 = tokenizer1.countTokens(testText); |
48 | 48 | const count2 = tokenizer2.countTokens(testText); |
49 | | - |
| 49 | + |
50 | 50 | // Should get same count since they normalize to same model |
51 | 51 | expect(count1).toBe(count2); |
52 | 52 | }); |
|
0 commit comments