From 5ab080e67833e97e58a8b3d6dc7364aceb7dd5cb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Felix=20B=C3=B6hm?= <188768+fb55@users.noreply.github.com> Date: Fri, 20 Aug 2021 13:07:57 +0100 Subject: [PATCH] refactor(parser): Correct start & end index positions (#910) Fixes #896 BREAKING: Some indices (primarily end indices) will have changed with this. --- src/Parser.spec.ts | 34 ++++++++++++++++++++++++++++++++-- src/Parser.ts | 41 +++++++++++++++++++---------------------- src/Tokenizer.ts | 7 +++++++ 3 files changed, 58 insertions(+), 24 deletions(-) diff --git a/src/Parser.spec.ts b/src/Parser.spec.ts index 8242a6867..02e12b15c 100644 --- a/src/Parser.spec.ts +++ b/src/Parser.spec.ts @@ -82,7 +82,7 @@ describe("API", () => { }); test("should update the position", () => { - const p = new Parser(null); + const p = new Parser(); p.write("foo"); @@ -95,8 +95,38 @@ describe("API", () => { expect(p.endIndex).toBe(7); }); + test("should not have the start index be greater than the end index", () => { + const onopentag = jest.fn(); + const onclosetag = jest.fn(); + + const p = new Parser({ + onopentag(tag) { + expect(p.startIndex).toBeLessThanOrEqual(p.endIndex); + onopentag(tag, p.startIndex, p.endIndex); + }, + onclosetag(tag) { + expect(p.startIndex).toBeLessThanOrEqual(p.endIndex); + onclosetag(tag, p.endIndex); + }, + }); + + p.write("

"); + + expect(onopentag).toHaveBeenLastCalledWith("p", 0, 2); + expect(onclosetag).not.toHaveBeenCalled(); + + p.write("Foo"); + + p.write("


"); + + expect(onopentag).toHaveBeenLastCalledWith("hr", 6, 9); + expect(onclosetag).toBeCalledTimes(2); + expect(onclosetag).toHaveBeenNthCalledWith(1, "p", 9); + expect(onclosetag).toHaveBeenNthCalledWith(2, "hr", 9); + }); + test("should update the position when a single tag is spread across multiple chunks", () => { - const p = new Parser(null); + const p = new Parser(); p.write("
"); diff --git a/src/Parser.ts b/src/Parser.ts index 06b66d3d1..8ab4d7c48 100644 --- a/src/Parser.ts +++ b/src/Parser.ts @@ -193,7 +193,7 @@ export class Parser { /** The start index of the last event. */ public startIndex = 0; /** The end index of the last event. */ - public endIndex: number | null = null; + public endIndex = 0; private tagname = ""; private attribname = ""; @@ -202,12 +202,14 @@ export class Parser { private stack: string[] = []; private readonly foreignContext: boolean[] = []; private readonly cbs: Partial; - private readonly options: ParserOptions; private readonly lowerCaseTagNames: boolean; private readonly lowerCaseAttributeNames: boolean; private readonly tokenizer: Tokenizer; - constructor(cbs: Partial | null, options: ParserOptions = {}) { + constructor( + cbs?: Partial | null, + private readonly options: ParserOptions = {} + ) { this.options = options; this.cbs = cbs ?? {}; this.lowerCaseTagNames = options.lowerCaseTags ?? !options.xmlMode; @@ -220,31 +222,25 @@ export class Parser { this.cbs.onparserinit?.(this); } - private updatePosition(initialOffset: number) { - if (this.endIndex === null) { - if (this.tokenizer.sectionStart <= initialOffset) { - this.startIndex = 0; - } else { - this.startIndex = this.tokenizer.sectionStart - initialOffset; - } - } else { - this.startIndex = this.endIndex + 1; - } + private updatePosition(offset: number) { + this.startIndex = this.tokenizer.getAbsoluteSectionStart() - offset; this.endIndex = this.tokenizer.getAbsoluteIndex(); } // Tokenizer event handlers ontext(data: string): void { - this.updatePosition(1); - (this.endIndex as number)--; + this.startIndex = this.tokenizer.getAbsoluteSectionStart(); + this.endIndex = this.tokenizer.getAbsoluteIndex() - 1; this.cbs.ontext?.(data); } - isVoidElement(name: string): boolean { + protected isVoidElement(name: string): boolean { return !this.options.xmlMode && voidElements.has(name); } onopentagname(name: string): void { + this.updatePosition(1); + if (this.lowerCaseTagNames) { name = name.toLowerCase(); } @@ -253,14 +249,12 @@ export class Parser { !this.options.xmlMode && Object.prototype.hasOwnProperty.call(openImpliesClose, name) ) { - let el; while ( this.stack.length > 0 && - openImpliesClose[name].has( - (el = this.stack[this.stack.length - 1]) - ) + openImpliesClose[name].has(this.stack[this.stack.length - 1]) ) { - this.onclosetag(el); + const el = this.stack.pop()!; + this.cbs.onclosetag?.(el); } } if (!this.isVoidElement(name)) { @@ -276,7 +270,8 @@ export class Parser { } onopentagend(): void { - this.updatePosition(1); + this.endIndex = this.tokenizer.getAbsoluteIndex(); + if (this.attribs) { this.cbs.onopentag?.(this.tagname, this.attribs); this.attribs = null; @@ -379,6 +374,7 @@ export class Parser { ondeclaration(value: string): void { if (this.cbs.onprocessinginstruction) { + this.updatePosition(2); const name = this.getInstructionName(value); this.cbs.onprocessinginstruction(`!${name}`, `!${value}`); } @@ -386,6 +382,7 @@ export class Parser { onprocessinginstruction(value: string): void { if (this.cbs.onprocessinginstruction) { + this.updatePosition(2); const name = this.getInstructionName(value); this.cbs.onprocessinginstruction(`?${name}`, `?${value}`); } diff --git a/src/Tokenizer.ts b/src/Tokenizer.ts index b59d24099..2ad922bb4 100644 --- a/src/Tokenizer.ts +++ b/src/Tokenizer.ts @@ -339,6 +339,13 @@ export default class Tokenizer { } } + /** + * The start of the current section. + */ + public getAbsoluteSectionStart(): number { + return this.sectionStart + this.bufferOffset; + } + /** * The current index within all of the written data. */