@@ -6,20 +6,22 @@ import {
66 approximateTokenSize ,
77 estimateTokenCount ,
88 isWithinTokenLimit ,
9+ sliceByTokens ,
910} from '../src/index'
1011
1112const fixturesDir = fileURLToPath ( new URL ( 'fixtures' , import . meta. url ) )
1213
1314describe ( 'token-related functions' , ( ) => {
15+ const ENGLISH_TEXT = 'Hello, world! This is a short sentence.'
16+ const GERMAN_TEXT = 'Die pünktlich gewünschte Trüffelfüllung im übergestülpten Würzkümmel-Würfel ist kümmerlich und dürfte fürderhin zu Rüffeln in Hülle und Fülle führen'
17+
1418 describe ( 'approximateTokenSize (legacy)' , ( ) => {
1519 it ( 'should approximate the token size for short English text' , ( ) => {
16- const input = 'Hello, world! This is a short sentence.'
17- expect ( approximateTokenSize ( input ) ) . toMatchInlineSnapshot ( '11' )
20+ expect ( approximateTokenSize ( ENGLISH_TEXT ) ) . toMatchInlineSnapshot ( '11' )
1821 } )
1922
2023 it ( 'should approximate the token size for short German text with umlauts' , ( ) => {
21- const input = 'Die pünktlich gewünschte Trüffelfüllung im übergestülpten Würzkümmel-Würfel ist kümmerlich und dürfte fürderhin zu Rüffeln in Hülle und Fülle führen'
22- expect ( approximateTokenSize ( input ) ) . toMatchInlineSnapshot ( '49' )
24+ expect ( approximateTokenSize ( GERMAN_TEXT ) ) . toMatchInlineSnapshot ( '49' )
2325 } )
2426
2527 it ( 'should approximate the token size for English ebook' , async ( ) => {
@@ -40,13 +42,11 @@ describe('token-related functions', () => {
4042
4143 describe ( 'estimateTokenCount' , ( ) => {
4244 it ( 'should estimate tokens for short English text' , ( ) => {
43- const input = 'Hello, world! This is a short sentence.'
44- expect ( estimateTokenCount ( input ) ) . toMatchInlineSnapshot ( '11' )
45+ expect ( estimateTokenCount ( ENGLISH_TEXT ) ) . toMatchInlineSnapshot ( '11' )
4546 } )
4647
4748 it ( 'should estimate tokens for German text with umlauts' , ( ) => {
48- const input = 'Die pünktlich gewünschte Trüffelfüllung im übergestülpten Würzkümmel-Würfel ist kümmerlich und dürfte fürderhin zu Rüffeln in Hülle und Fülle führen'
49- expect ( estimateTokenCount ( input ) ) . toMatchInlineSnapshot ( '49' )
49+ expect ( estimateTokenCount ( GERMAN_TEXT ) ) . toMatchInlineSnapshot ( '49' )
5050 } )
5151
5252 it ( 'should handle empty input' , ( ) => {
@@ -88,4 +88,66 @@ describe('token-related functions', () => {
8888 expect ( isWithinTokenLimit ( input , tokenLimit , customOptions ) ) . toBe ( false )
8989 } )
9090 } )
91+
92+ describe ( 'sliceByTokens' , ( ) => {
93+ it ( 'should handle empty input and return entire text without bounds' , ( ) => {
94+ // Empty input
95+ expect ( sliceByTokens ( '' ) ) . toBe ( '' )
96+ expect ( sliceByTokens ( '' , 0 , 5 ) ) . toBe ( '' )
97+
98+ // No bounds - return entire text
99+ expect ( sliceByTokens ( ENGLISH_TEXT ) ) . toBe ( ENGLISH_TEXT )
100+ } )
101+
102+ it ( 'should slice English text with positive indices' , ( ) => {
103+ // Test specific slice behavior with known English text
104+ const firstTwoTokens = sliceByTokens ( ENGLISH_TEXT , 0 , 2 )
105+ const fromThirdToken = sliceByTokens ( ENGLISH_TEXT , 2 )
106+
107+ expect ( firstTwoTokens ) . toMatchInlineSnapshot ( '"Hello,"' )
108+ expect ( fromThirdToken ) . toMatchInlineSnapshot ( '" world! This is a short sentence."' )
109+
110+ // Verify they combine to cover most of the original
111+ expect ( firstTwoTokens . length + fromThirdToken . length ) . toBeGreaterThan ( ENGLISH_TEXT . length * 0.8 )
112+ } )
113+
114+ it ( 'should slice German text with positive indices' , ( ) => {
115+ // First 3 tokens
116+ const firstThree = sliceByTokens ( GERMAN_TEXT , 0 , 3 )
117+ expect ( firstThree ) . toMatchInlineSnapshot ( '"Die pünktl"' )
118+
119+ // Middle section
120+ const middle = sliceByTokens ( GERMAN_TEXT , 5 , 10 )
121+ expect ( middle . length ) . toBeGreaterThan ( 0 )
122+ expect ( middle . length ) . toBeLessThan ( GERMAN_TEXT . length )
123+ } )
124+
125+ it ( 'should slice German text with negative indices' , ( ) => {
126+ // Last 3 tokens
127+ const lastThree = sliceByTokens ( GERMAN_TEXT , - 3 )
128+ expect ( lastThree ) . toMatchInlineSnapshot ( '"lle führen"' )
129+
130+ // Exclude last 2 tokens
131+ const withoutLastTwo = sliceByTokens ( GERMAN_TEXT , 0 , - 2 )
132+ expect ( withoutLastTwo . endsWith ( 'Fülle' ) ) . toBe ( true )
133+
134+ // Both negative indices
135+ const middleNegative = sliceByTokens ( GERMAN_TEXT , - 8 , - 3 )
136+ expect ( middleNegative . length ) . toBeGreaterThan ( 0 )
137+ expect ( middleNegative . includes ( 'Hülle' ) ) . toBe ( true )
138+ } )
139+
140+ it ( 'should handle edge cases' , ( ) => {
141+ const totalTokens = estimateTokenCount ( GERMAN_TEXT )
142+
143+ // Invalid ranges
144+ expect ( sliceByTokens ( GERMAN_TEXT , 10 , 5 ) ) . toBe ( '' )
145+ expect ( sliceByTokens ( GERMAN_TEXT , 5 , 5 ) ) . toBe ( '' )
146+
147+ // Out of bounds
148+ expect ( sliceByTokens ( GERMAN_TEXT , totalTokens + 10 ) ) . toBe ( '' )
149+ expect ( sliceByTokens ( GERMAN_TEXT , 0 , totalTokens + 10 ) ) . toBe ( GERMAN_TEXT )
150+ expect ( sliceByTokens ( GERMAN_TEXT , - 1000 ) ) . toBe ( GERMAN_TEXT )
151+ } )
152+ } )
91153} )
0 commit comments