Skip to content

Commit 34934b0

Browse files
committed
perf: improve heap space usage
1 parent 90f1020 commit 34934b0

File tree

4 files changed

+48
-37
lines changed

4 files changed

+48
-37
lines changed

src/const.ts

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -235,6 +235,9 @@ export const TRACK_DEPTH_MAP_KEYS = [
235235
TAG_TH,
236236
]
237237

238+
// Maximum tag ID for creating the typed array (97 for TAG_RP + 1 for buffer)
239+
export const MAX_TAG_ID = 98
240+
238241
// Pre-defined strings to avoid repeated allocations
239242
export const FRONTMATTER_START = '---'
240243
export const FRONTMATTER_END = '\n---'

src/parser.ts

Lines changed: 13 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
1-
import type { MdreamProcessingState, MdreamRuntimeState, Node, NodeEvent, ParentNode } from './types.ts'
1+
import type { DepthMapArray, MdreamProcessingState, MdreamRuntimeState, Node, NodeEvent, ParentNode } from './types.ts'
22
import {
33
ELEMENT_NODE,
4+
MAX_TAG_ID,
45
NodeEventEnter,
56
NodeEventExit,
67
TAG_A,
@@ -39,19 +40,12 @@ const TAB_CHAR = 9 // '\t'
3940
const NEWLINE_CHAR = 10 // '\n'
4041
const CARRIAGE_RETURN_CHAR = 13 // '\r'
4142

42-
// Pre-allocate arrays to reduce allocations
43+
// Pre-allocate arrays and objects to reduce allocations
4344
const EMPTY_ATTRIBUTES: Record<string, string> = Object.freeze({})
4445

45-
// Fast object copy for small objects like depthMap
46+
// Fast typed array copy for depthMap
4647
function copyDepthMap(depthMap: Node['depthMap']): Node['depthMap'] {
47-
const copy: Node['depthMap'] = {}
48-
for (let i = 0; i < TRACK_DEPTH_MAP_KEYS.length; i++) {
49-
const key = TRACK_DEPTH_MAP_KEYS[i]
50-
if (depthMap[key]) {
51-
copy[key] = depthMap[key]
52-
}
53-
}
54-
return copy
48+
return new Uint8Array(depthMap)
5549
}
5650

5751
/**
@@ -74,7 +68,7 @@ export function parseHTML(htmlChunk: string, state: MdreamProcessingState, handl
7468
let textBuffer = '' // Buffer to accumulate text content
7569

7670
// Initialize state
77-
state.depthMap ??= {}
71+
state.depthMap ??= new Uint8Array(MAX_TAG_ID) // Initialize using typed array
7872
state.depth ??= 0
7973
state.lastCharWasWhitespace ??= true // don't allow subsequent whitespace at start
8074
state.justClosedTag ??= false
@@ -448,7 +442,7 @@ function closeNode(node: ParentNode | Node | null, state: MdreamProcessingState,
448442
}
449443

450444
if (node.tagId) {
451-
state.depthMap[node.tagId] = Math.max(0, (state.depthMap[node.tagId] || 0) - 1)
445+
state.depthMap[node.tagId] = Math.max(0, state.depthMap[node.tagId] - 1)
452446
}
453447

454448
if (state.inUnsupportedNodeDepth === state.depth) {
@@ -553,22 +547,13 @@ function processOpeningTag(
553547
closeNode(state.currentElementNode, state, handleEvent)
554548
}
555549

556-
// Fast increment depth tracking
557-
const currentTagCount = state.depthMap[tagId] || 0
558-
state.depthMap[tagId] = currentTagCount + 1
559-
state.depth++
560-
561550
// Get tag handler for this tag
562551
const tagHandler = tagHandlers[tagId]
563552

564553
// Process attributes and tag properties
565554
const result = processTagAttributes(htmlChunk, i, tagHandler)
566555

567556
if (!result.complete) {
568-
// Roll back depth changes
569-
state.depthMap[tagId] = currentTagCount
570-
state.depth--
571-
572557
return {
573558
complete: false,
574559
newPosition: i,
@@ -577,6 +562,11 @@ function processOpeningTag(
577562
}
578563
}
579564

565+
// Fast increment depth tracking with Uint8Array
566+
const currentTagCount = state.depthMap[tagId]
567+
state.depthMap[tagId] = currentTagCount + 1
568+
state.depth++
569+
580570
i = result.newPosition
581571

582572
// Pre-compute flags
@@ -823,6 +813,7 @@ export function processPartialHTMLToMarkdown(
823813
state: Partial<MdreamRuntimeState> = {},
824814
): { chunk: string, remainingHTML: string } {
825815
state.fragmentCount = 0
816+
state.depthMap ??= new Uint8Array(MAX_TAG_ID)
826817

827818
const strategy = state.options?.strategy
828819
const isMinimalFromFirstHeader = strategy === 'minimal-from-first-header'

src/types.ts

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,13 @@ export interface HTMLToMarkdownOptions {
1717
export const ELEMENT_NODE = 1
1818
export const TEXT_NODE = 3
1919

20+
/**
21+
* Type for the optimized depth map using Uint8Array
22+
* This replaces the Record<number, number> with a fixed-size typed array
23+
* for faster property access
24+
*/
25+
export type DepthMapArray = Uint8Array
26+
2027
/**
2128
* Base DOM node interface
2229
*/
@@ -39,8 +46,8 @@ export interface Node {
3946
/** Current nesting depth in the DOM tree */
4047
depth: number
4148

42-
/** Map of tag names to their nesting count */
43-
depthMap: Required<MdreamRuntimeState>['depthMap']
49+
/** Map of tag names to their nesting count (using Uint8Array for performance) */
50+
depthMap: DepthMapArray
4451

4552
/** Whether this node should be excluded from output */
4653
minimal: boolean
@@ -80,8 +87,8 @@ export interface ParentNode extends Node {
8087
* State interface for HTML parsing and processing
8188
*/
8289
export interface MdreamProcessingState {
83-
/** Map of tag names to their current nesting depth */
84-
depthMap: Record<number, number>
90+
/** Map of tag names to their current nesting depth (using Uint8Array for performance) */
91+
depthMap: DepthMapArray
8592

8693
/** Current overall nesting depth */
8794
depth: number

test/unit/walk.test.ts

Lines changed: 21 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
import type { MdreamProcessingState, NodeEvent } from '../../src/types.js'
22
import { describe, expect, it } from 'vitest'
3-
import { NodeEventEnter } from '../../src/const.js'
3+
import { MAX_TAG_ID, NodeEventEnter } from '../../src/const.js'
44
import { syncHtmlToMarkdown } from '../../src/index.js'
55
import { parseHTML } from '../../src/parser.js'
66

@@ -10,7 +10,7 @@ describe('hTML walking', () => {
1010
const depthLog: { tagName: string, depth: number, event: string }[] = []
1111

1212
const state: Partial<MdreamProcessingState> = {
13-
depthMap: {},
13+
depthMap: new Uint8Array(MAX_TAG_ID),
1414
depth: 0,
1515
currentElementNode: null,
1616
}
@@ -45,7 +45,7 @@ describe('hTML walking', () => {
4545
const depthLog: { tagName: string, depth: number, event: string }[] = []
4646

4747
const state: Partial<MdreamProcessingState> = {
48-
depthMap: {},
48+
depthMap: new Uint8Array(MAX_TAG_ID),
4949
depth: 0,
5050
currentElementNode: null,
5151
}
@@ -79,10 +79,10 @@ describe('hTML walking', () => {
7979

8080
it('tracks depthMap correctly for multiple levels of nested elements', () => {
8181
const html = '<div><ul><li><a href="#">Link <strong>with bold</strong> text</a></li></ul></div>'
82-
const depthMapLog: { tagName: string, depthMap: Record<number, number> }[] = []
82+
const depthMapLog: { tagName: string, depthMap: Uint8Array }[] = []
8383

8484
const state: Partial<MdreamProcessingState> = {
85-
depthMap: {},
85+
depthMap: new Uint8Array(MAX_TAG_ID),
8686
depth: 0,
8787
currentElementNode: null,
8888
}
@@ -93,7 +93,7 @@ describe('hTML walking', () => {
9393
if (event.type === NodeEventEnter && node.type === 1) { // ELEMENT_NODE enter
9494
depthMapLog.push({
9595
tagName: node.name || '',
96-
depthMap: { ...node.depthMap }, // Copy to avoid reference issues in test
96+
depthMap: new Uint8Array(node.depthMap), // Copy to avoid reference issues in test
9797
})
9898
}
9999
}
@@ -109,11 +109,21 @@ describe('hTML walking', () => {
109109
expect(depthMapLog[4].tagName).toBe('strong')
110110

111111
// Each node should have a depthMap that includes itself and its ancestors
112-
expect(depthMapLog[4].depthMap).toMatchObject({
113-
// The exact tag IDs will depend on the TAG_MAP constants
114-
// but the strong element should have entries for div, ul, li, a, and strong
115-
// with counts >= 1
116-
})
112+
// Check that the values in the Uint8Array for the corresponding tag IDs
113+
// are all greater than 0 for the expected elements
114+
const depthMap = depthMapLog[4].depthMap
115+
// Import these constants in a real implementation
116+
const divId = 36 // TAG_DIV
117+
const ulId = 24 // TAG_UL
118+
const liId = 25 // TAG_LI
119+
const aId = 26 // TAG_A
120+
const strongId = 14 // TAG_STRONG
121+
122+
expect(depthMap[divId]).toBeGreaterThan(0)
123+
expect(depthMap[ulId]).toBeGreaterThan(0)
124+
expect(depthMap[liId]).toBeGreaterThan(0)
125+
expect(depthMap[aId]).toBeGreaterThan(0)
126+
expect(depthMap[strongId]).toBeGreaterThan(0)
117127
})
118128

119129
it('handles complex nested elements with text nodes', () => {

0 commit comments

Comments
 (0)