From 78af88d7b480823a1f789c7d5a7e94573694ff65 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20B=C3=B6hm?= <188768+fb55@users.noreply.github.com>
Date: Fri, 27 Aug 2021 13:31:59 +0100
Subject: [PATCH] fix(tokenizer): Don't lose data on `.pause` (#927)
---
src/Tokenizer.spec.ts | 24 ++++++++
src/Tokenizer.ts | 70 ++++++++++++------------
src/__snapshots__/Tokenizer.spec.ts.snap | 16 ++++++
3 files changed, 75 insertions(+), 35 deletions(-)
diff --git a/src/Tokenizer.spec.ts b/src/Tokenizer.spec.ts
index 5a71ee48f..8f5672cf4 100644
--- a/src/Tokenizer.spec.ts
+++ b/src/Tokenizer.spec.ts
@@ -41,4 +41,28 @@ describe("Tokenizer", () => {
expect(tokenize("
")).toMatchSnapshot();
});
});
+
+ it("should not lose data when pausing", () => {
+ const log: unknown[][] = [];
+ const tokenizer = new Tokenizer(
+ {},
+ new Proxy({} as any, {
+ get(_, prop) {
+ return (...args: unknown[]) => {
+ if (prop === "ontext") {
+ tokenizer.pause();
+ }
+ log.push([prop, ...args]);
+ };
+ },
+ })
+ );
+
+ tokenizer.write("& it up!");
+ tokenizer.resume();
+ tokenizer.resume();
+ tokenizer.end();
+
+ expect(log).toMatchSnapshot();
+ });
});
diff --git a/src/Tokenizer.ts b/src/Tokenizer.ts
index a606e3a45..b88278b64 100644
--- a/src/Tokenizer.ts
+++ b/src/Tokenizer.ts
@@ -817,30 +817,27 @@ export default class Tokenizer {
return !this.xmlMode && this.baseState === State.Text;
}
+ /**
+ * Remove data that has already been consumed from the buffer.
+ */
private cleanup() {
- if (this.sectionStart < 0) {
- this.buffer = "";
- this.bufferOffset += this._index;
- this._index = 0;
- } else if (this.running) {
- if (this._state === State.Text) {
- if (this.sectionStart !== this._index) {
- this.cbs.ontext(this.buffer.substr(this.sectionStart));
- }
- this.buffer = "";
- this.bufferOffset += this._index;
- this._index = 0;
- } else if (this.sectionStart === this._index) {
- // The section just started
- this.buffer = "";
- this.bufferOffset += this._index;
- this._index = 0;
- } else {
- // Remove everything unnecessary
- this.buffer = this.buffer.substr(this.sectionStart);
- this._index -= this.sectionStart;
- this.bufferOffset += this.sectionStart;
- }
+ // If we are inside of text, emit what we already have.
+ if (
+ this.running &&
+ this._state === State.Text &&
+ this.sectionStart !== this._index
+ ) {
+ // TODO: We could emit attribute data here as well.
+ this.cbs.ontext(this.buffer.substr(this.sectionStart));
+ this.sectionStart = this._index;
+ }
+
+ const start = this.sectionStart < 0 ? this._index : this.sectionStart;
+ this.buffer = this.buffer.substr(start);
+ this._index -= start;
+ this.bufferOffset += start;
+
+ if (this.sectionStart > 0) {
this.sectionStart = 0;
}
}
@@ -1000,6 +997,7 @@ export default class Tokenizer {
this.cbs.onend();
}
+ /** Handle any trailing data. */
private handleTrailingData() {
const data = this.buffer.substr(this.sectionStart);
if (
@@ -1029,21 +1027,23 @@ export default class Tokenizer {
this.decodeNumericEntity(16, false);
// All trailing data will have been consumed
} else if (
- this._state !== State.InTagName &&
- this._state !== State.BeforeAttributeName &&
- this._state !== State.BeforeAttributeValue &&
- this._state !== State.AfterAttributeName &&
- this._state !== State.InAttributeName &&
- this._state !== State.InAttributeValueSq &&
- this._state !== State.InAttributeValueDq &&
- this._state !== State.InAttributeValueNq &&
- this._state !== State.InClosingTagName
+ this._state === State.InTagName ||
+ this._state === State.BeforeAttributeName ||
+ this._state === State.BeforeAttributeValue ||
+ this._state === State.AfterAttributeName ||
+ this._state === State.InAttributeName ||
+ this._state === State.InAttributeValueSq ||
+ this._state === State.InAttributeValueDq ||
+ this._state === State.InAttributeValueNq ||
+ this._state === State.InClosingTagName
) {
+ /*
+ * If we are currently in an opening or closing tag, us not calling the
+ * respective callback signals that the tag should be ignored.
+ */
+ } else {
this.cbs.ontext(data);
}
- /*
- * TODO add a way to remove current tag
- */
}
private getSection(): string {
diff --git a/src/__snapshots__/Tokenizer.spec.ts.snap b/src/__snapshots__/Tokenizer.spec.ts.snap
index 5b8d9202c..38a60dfff 100644
--- a/src/__snapshots__/Tokenizer.spec.ts.snap
+++ b/src/__snapshots__/Tokenizer.spec.ts.snap
@@ -1,5 +1,21 @@
// Jest Snapshot v1, https://goo.gl/fbAQLP
+exports[`Tokenizer should not lose data when pausing 1`] = `
+Array [
+ Array [
+ "ontext",
+ "&",
+ ],
+ Array [
+ "ontext",
+ " it up!",
+ ],
+ Array [
+ "onend",
+ ],
+]
+`;
+
exports[`Tokenizer should support self-closing special tags for self-closing script tag 1`] = `
Array [
Array [