@@ -104,7 +104,7 @@ export function htmlToMarkdownSplitChunks(
104104 let lastChunkEndPosition = 0
105105 let lastSplitPosition = 0 // Track where we last split to avoid re-splitting
106106
107- function flushChunk ( endPosition ?: number ) {
107+ function flushChunk ( endPosition ?: number , applyOverlap = false ) {
108108 const currentMd = getCurrentMarkdown ( processor . state )
109109 const chunkEnd = endPosition ?? currentMd . length
110110 const chunkContent = currentMd . slice ( lastChunkEndPosition , chunkEnd )
@@ -149,8 +149,8 @@ export function htmlToMarkdownSplitChunks(
149149 // Track where we split (before applying overlap)
150150 lastSplitPosition = chunkEnd
151151
152- // Handle overlap - ensure we always advance by at least 1 char
153- if ( opts . chunkOverlap > 0 ) {
152+ // Handle overlap - only for size-based splits, not structural splits
153+ if ( applyOverlap && opts . chunkOverlap > 0 ) {
154154 // Cap overlap to (chunkContent.length - 1) to ensure forward progress
155155 const maxOverlap = Math . max ( 0 , chunkContent . length - 1 )
156156 const actualOverlap = Math . min ( opts . chunkOverlap , maxOverlap )
@@ -276,7 +276,7 @@ export function htmlToMarkdownSplitChunks(
276276 splitPosition = currentMd . length
277277 }
278278
279- flushChunk ( splitPosition )
279+ flushChunk ( splitPosition , true )
280280 }
281281 }
282282 } )
0 commit comments