Skip to content

Commit

Permalink
refactor(parser): Correct start & end index positions (#910)
Browse files Browse the repository at this point in the history
Fixes #896

BREAKING: Some indices (primarily end indices) will have changed with this.
  • Loading branch information
fb55 committed Aug 20, 2021
1 parent ef8f2bd commit 5ab080e
Show file tree
Hide file tree
Showing 3 changed files with 58 additions and 24 deletions.
34 changes: 32 additions & 2 deletions src/Parser.spec.ts
Expand Up @@ -82,7 +82,7 @@ describe("API", () => {
});

test("should update the position", () => {
const p = new Parser(null);
const p = new Parser();

p.write("foo");

Expand All @@ -95,8 +95,38 @@ describe("API", () => {
expect(p.endIndex).toBe(7);
});

test("should not have the start index be greater than the end index", () => {
const onopentag = jest.fn();
const onclosetag = jest.fn();

const p = new Parser({
onopentag(tag) {
expect(p.startIndex).toBeLessThanOrEqual(p.endIndex);
onopentag(tag, p.startIndex, p.endIndex);
},
onclosetag(tag) {
expect(p.startIndex).toBeLessThanOrEqual(p.endIndex);
onclosetag(tag, p.endIndex);
},
});

p.write("<p>");

expect(onopentag).toHaveBeenLastCalledWith("p", 0, 2);
expect(onclosetag).not.toHaveBeenCalled();

p.write("Foo");

p.write("<hr>");

expect(onopentag).toHaveBeenLastCalledWith("hr", 6, 9);
expect(onclosetag).toBeCalledTimes(2);
expect(onclosetag).toHaveBeenNthCalledWith(1, "p", 9);
expect(onclosetag).toHaveBeenNthCalledWith(2, "hr", 9);
});

test("should update the position when a single tag is spread across multiple chunks", () => {
const p = new Parser(null);
const p = new Parser();

p.write("<div ");
p.write("foo=bar>");
Expand Down
41 changes: 19 additions & 22 deletions src/Parser.ts
Expand Up @@ -193,7 +193,7 @@ export class Parser {
/** The start index of the last event. */
public startIndex = 0;
/** The end index of the last event. */
public endIndex: number | null = null;
public endIndex = 0;

private tagname = "";
private attribname = "";
Expand All @@ -202,12 +202,14 @@ export class Parser {
private stack: string[] = [];
private readonly foreignContext: boolean[] = [];
private readonly cbs: Partial<Handler>;
private readonly options: ParserOptions;
private readonly lowerCaseTagNames: boolean;
private readonly lowerCaseAttributeNames: boolean;
private readonly tokenizer: Tokenizer;

constructor(cbs: Partial<Handler> | null, options: ParserOptions = {}) {
constructor(
cbs?: Partial<Handler> | null,
private readonly options: ParserOptions = {}
) {
this.options = options;
this.cbs = cbs ?? {};
this.lowerCaseTagNames = options.lowerCaseTags ?? !options.xmlMode;
Expand All @@ -220,31 +222,25 @@ export class Parser {
this.cbs.onparserinit?.(this);
}

private updatePosition(initialOffset: number) {
if (this.endIndex === null) {
if (this.tokenizer.sectionStart <= initialOffset) {
this.startIndex = 0;
} else {
this.startIndex = this.tokenizer.sectionStart - initialOffset;
}
} else {
this.startIndex = this.endIndex + 1;
}
private updatePosition(offset: number) {
this.startIndex = this.tokenizer.getAbsoluteSectionStart() - offset;
this.endIndex = this.tokenizer.getAbsoluteIndex();
}

// Tokenizer event handlers
ontext(data: string): void {
this.updatePosition(1);
(this.endIndex as number)--;
this.startIndex = this.tokenizer.getAbsoluteSectionStart();
this.endIndex = this.tokenizer.getAbsoluteIndex() - 1;
this.cbs.ontext?.(data);
}

isVoidElement(name: string): boolean {
protected isVoidElement(name: string): boolean {
return !this.options.xmlMode && voidElements.has(name);
}

onopentagname(name: string): void {
this.updatePosition(1);

if (this.lowerCaseTagNames) {
name = name.toLowerCase();
}
Expand All @@ -253,14 +249,12 @@ export class Parser {
!this.options.xmlMode &&
Object.prototype.hasOwnProperty.call(openImpliesClose, name)
) {
let el;
while (
this.stack.length > 0 &&
openImpliesClose[name].has(
(el = this.stack[this.stack.length - 1])
)
openImpliesClose[name].has(this.stack[this.stack.length - 1])
) {
this.onclosetag(el);
const el = this.stack.pop()!;
this.cbs.onclosetag?.(el);
}
}
if (!this.isVoidElement(name)) {
Expand All @@ -276,7 +270,8 @@ export class Parser {
}

onopentagend(): void {
this.updatePosition(1);
this.endIndex = this.tokenizer.getAbsoluteIndex();

if (this.attribs) {
this.cbs.onopentag?.(this.tagname, this.attribs);
this.attribs = null;
Expand Down Expand Up @@ -379,13 +374,15 @@ export class Parser {

ondeclaration(value: string): void {
if (this.cbs.onprocessinginstruction) {
this.updatePosition(2);
const name = this.getInstructionName(value);
this.cbs.onprocessinginstruction(`!${name}`, `!${value}`);
}
}

onprocessinginstruction(value: string): void {
if (this.cbs.onprocessinginstruction) {
this.updatePosition(2);
const name = this.getInstructionName(value);
this.cbs.onprocessinginstruction(`?${name}`, `?${value}`);
}
Expand Down
7 changes: 7 additions & 0 deletions src/Tokenizer.ts
Expand Up @@ -339,6 +339,13 @@ export default class Tokenizer {
}
}

/**
* The start of the current section.
*/
public getAbsoluteSectionStart(): number {
return this.sectionStart + this.bufferOffset;
}

/**
* The current index within all of the written data.
*/
Expand Down

0 comments on commit 5ab080e

Please sign in to comment.