11import type { ElementNode , HandlerContext , MdreamRuntimeState , NodeEvent , TextNode } from './types'
2+
3+ /**
4+ * Determines if spacing is needed between two characters
5+ */
6+ function needsSpacing ( lastChar : string , firstChar : string ) : boolean {
7+ const noSpaceLastChars = new Set ( [ '\n' , ' ' , '[' , '>' , '_' , '*' , '`' , '|' , '#' , '<' , '(' ] )
8+ const noSpaceFirstChars = new Set ( [ ' ' , '\n' , '\t' , '_' , '*' , '`' , '|' , '>' , '#' ] )
9+
10+ return ! noSpaceLastChars . has ( lastChar ) && ! noSpaceFirstChars . has ( firstChar )
11+ }
12+
13+ /**
14+ * Determines if spacing should be added before text content
15+ */
16+ function shouldAddSpacingBeforeText ( lastChar : string , lastNode : any , textNode : TextNode ) : boolean {
17+ return lastChar &&
18+ lastChar !== '\n' &&
19+ lastChar !== ' ' &&
20+ lastChar !== '[' &&
21+ lastChar !== '>' &&
22+ ! lastNode ?. tagHandler ?. isInline &&
23+ textNode . value [ 0 ] !== ' '
24+ }
225import { collectNodeContent } from './buffer-region'
326import {
427 DEFAULT_BLOCK_SPACING ,
@@ -41,11 +64,23 @@ export function processHtmlEventToMarkdown(
4164 state : MdreamRuntimeState ,
4265) : void {
4366 const { type : eventType , node } = event
44-
67+ const lastNode = state . lastNode
68+ state . lastNode = event . node
69+ const buff = state . regionContentBuffers . get ( node . regionId || 0 ) || [ ]
70+ const lastBuffEntry = buff [ buff . length - 1 ]
71+ const lastChar = lastBuffEntry ?. charAt ( lastBuffEntry . length - 1 ) || ''
72+ // we need to see if it exists within buff[lastIndex] or buff[lastIndex - 1]
73+ let secondLastChar
74+ if ( lastBuffEntry ?. length > 1 ) {
75+ secondLastChar = lastBuffEntry . charAt ( lastBuffEntry . length - 2 )
76+ }
77+ else {
78+ secondLastChar = buff [ buff . length - 2 ] ?. charAt ( buff [ buff . length - 2 ] . length - 1 )
79+ }
4580 // Handle text nodes
4681 if ( node . type === TEXT_NODE && eventType === NodeEventEnter ) {
4782 const textNode = node as TextNode
48- state . lastNewLines = 0
83+
4984 if ( textNode . value ) {
5085 // Process text node with plugins
5186 if ( state . plugins ?. length ) {
@@ -58,6 +93,16 @@ export function processHtmlEventToMarkdown(
5893 textNode . value = pluginResult . content
5994 }
6095 }
96+ // Skip leading spaces after newlines
97+ if ( textNode . value === ' ' && lastChar === '\n' ) {
98+ return
99+ }
100+
101+ // Add spacing before text if needed
102+ if ( shouldAddSpacingBeforeText ( lastChar , lastNode , textNode ) ) {
103+ textNode . value = ` ${ textNode . value } `
104+ }
105+
61106 collectNodeContent ( textNode , textNode . value , state )
62107 }
63108 state . lastTextNode = textNode
@@ -89,6 +134,14 @@ export function processHtmlEventToMarkdown(
89134 output . push ( ...results )
90135 }
91136
137+ let lastNewLines = 0
138+ if ( lastChar === '\n' ) {
139+ lastNewLines ++
140+ }
141+ if ( secondLastChar === '\n' ) {
142+ lastNewLines ++
143+ }
144+
92145 const eventFn = eventType === NodeEventEnter ? 'enter' : 'exit'
93146 // Use the cached tag handler directly from the node
94147 const handler = node . tagHandler
@@ -99,82 +152,58 @@ export function processHtmlEventToMarkdown(
99152 }
100153 }
101154
102- // Trim trailing whitespace from the last text node
103- if ( ! state . lastNewLines && lastFragment && state . lastTextNode ?. containsWhitespace && ! ! node . parent && 'value' in state . lastTextNode && typeof state . lastTextNode . value === 'string' ) {
104- if ( ! node . parent . depthMap [ TAG_PRE ] || node . parent . tagId === TAG_PRE ) {
105- const originalLength = lastFragment . length
106- const trimmed = lastFragment . trimEnd ( )
107- const trimmedChars = originalLength - trimmed . length
108-
109- // Update the last content in buffer regions with trimmed content
110- if ( trimmedChars > 0 ) {
111- for ( const buffer of Array . from ( state . regionContentBuffers . values ( ) ) ) {
112- if ( buffer . length > 0 && buffer [ buffer . length - 1 ] === lastFragment ) {
113- buffer [ buffer . length - 1 ] = trimmed
114- break
115- }
116- }
117- }
118-
119- state . lastTextNode = undefined
120- }
121- }
122-
123155 // Handle newlines
124156 const newLineConfig = calculateNewLineConfig ( node as ElementNode )
125- let newLines = newLineConfig [ eventType ] || 0
157+ const newLines = Math . max ( 0 , ( newLineConfig [ eventType ] || 0 ) - lastNewLines )
126158
127159 if ( newLines > 0 ) {
128- // Initialize lastNewLines if undefined
129- state . lastNewLines ??= 0
130-
131- // Adjust count based on existing newlines
132- newLines = Math . max ( 0 , newLines - state . lastNewLines )
160+ // if the region has no content, add the current content (without new lines)
161+ if ( ! buff . length ) {
162+ for ( const fragment of output ) {
163+ collectNodeContent ( node , fragment , state )
164+ }
165+ return
166+ }
133167
134- // Handle enter events with content
135- if ( eventType === NodeEventEnter && output . length ) {
136- state . lastNewLines = 0
168+ // Add newlines
169+ const newlinesStr = '\n' . repeat ( newLines )
170+ // trim only whitespace
171+ if ( lastChar === ' ' && buff ?. length ) {
172+ buff [ buff . length - 1 ] = buff [ buff . length - 1 ] . substring ( 0 , buff [ buff . length - 1 ] . length - 1 )
137173 }
138- if ( newLines > 0 ) {
139- if ( ! state . regionContentBuffers . get ( event . node . regionId || 0 ) ?. length ) {
140- for ( const fragment of output ) {
141- collectNodeContent ( node , fragment , state )
142- }
143- return
144- }
145- // Update state for non-enter events
146- if ( eventType !== NodeEventEnter || ! output . length ) {
147- state . lastNewLines = newLines
148- }
149174
150- // Add newlines
151- const newlinesStr = '\n' . repeat ( newLines )
152- // trim only whitespace
153- if ( lastFragment && typeof lastFragment === 'string' && lastFragment . length > 0 ) {
154- const lastChar = lastFragment . charAt ( lastFragment . length - 1 )
155- if ( lastChar === ' ' ) {
156- // Update the last content in buffer regions with trimmed content
157- for ( const buffer of Array . from ( state . regionContentBuffers . values ( ) ) ) {
158- if ( buffer . length > 0 && buffer [ buffer . length - 1 ] === lastFragment ) {
159- buffer [ buffer . length - 1 ] = lastFragment . substring ( 0 , lastFragment . length - 1 )
160- break
161- }
175+ if ( eventType === NodeEventEnter ) {
176+ output . unshift ( newlinesStr )
177+ }
178+ else {
179+ output . push ( newlinesStr )
180+ }
181+ }
182+ else {
183+ // trim whitespaced between inline output
184+ // Trim trailing whitespace from the last text node
185+ if ( lastFragment && state . lastTextNode ?. containsWhitespace && ! ! node . parent && 'value' in state . lastTextNode && typeof state . lastTextNode . value === 'string' ) {
186+ if ( ! node . parent . depthMap [ TAG_PRE ] || node . parent . tagId === TAG_PRE ) {
187+ const originalLength = lastFragment . length
188+ const trimmed = lastFragment . trimEnd ( )
189+ const trimmedChars = originalLength - trimmed . length
190+
191+ // Update the last content in buffer regions with trimmed content
192+ if ( trimmedChars > 0 ) {
193+ if ( buff ?. length && buff [ buff . length - 1 ] === lastFragment ) {
194+ buff [ buff . length - 1 ] = trimmed
162195 }
163196 }
164- }
165197
166- if ( eventType === NodeEventEnter ) {
167- output . unshift ( newlinesStr )
168- }
169- else {
170- output . push ( newlinesStr )
198+ state . lastTextNode = undefined
171199 }
172200 }
173201 }
174- else {
175- state . lastNewLines = 0
176- }
177202
203+ // Add spacing between inline elements if needed
204+ if ( output [ 0 ] ?. [ 0 ] && eventType === NodeEventEnter && lastChar && needsSpacing ( lastChar , output [ 0 ] [ 0 ] ) ) {
205+ collectNodeContent ( node , ' ' , state )
206+ }
178207 // Calculate total length of output fragments before adding to the main fragments
179208 for ( const fragment of output ) {
180209 collectNodeContent ( node , fragment , state )
0 commit comments