Skip to content

Commit

Permalink
refactor(tokenizer): Use EntityDecoder (#1480)
Browse files Browse the repository at this point in the history
  • Loading branch information
fb55 committed Apr 13, 2023
1 parent c621d56 commit 11b6af2
Show file tree
Hide file tree
Showing 6 changed files with 255 additions and 273 deletions.
14 changes: 7 additions & 7 deletions package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@
"domelementtype": "^2.3.0",
"domhandler": "^5.0.3",
"domutils": "^3.0.1",
"entities": "^4.4.0"
"entities": "^4.5.0"
},
"devDependencies": {
"@types/jest": "^29.5.0",
Expand Down
11 changes: 3 additions & 8 deletions src/Parser.ts
Original file line number Diff line number Diff line change
Expand Up @@ -251,15 +251,10 @@ export class Parser implements Callbacks {
}

/** @internal */
ontextentity(cp: number): void {
/*
* Entities can be emitted on the character, or directly after.
* We use the section start here to get accurate indices.
*/
const index = this.tokenizer.getSectionStart();
this.endIndex = index - 1;
ontextentity(cp: number, endIndex: number): void {
this.endIndex = endIndex - 1;
this.cbs.ontext?.(fromCodePoint(cp));
this.startIndex = index;
this.startIndex = endIndex;
}

protected isVoidElement(name: string): boolean {
Expand Down
29 changes: 26 additions & 3 deletions src/Tokenizer.spec.ts
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
import { Tokenizer } from "./index.js";
import type { Callbacks } from "./Tokenizer.js";

function tokenize(data: string) {
function tokenize(data: string, options = {}) {
const log: unknown[][] = [];
const tokenizer = new Tokenizer(
{},
options,
new Proxy(
{},
{
Expand Down Expand Up @@ -56,6 +56,28 @@ describe("Tokenizer", () => {
});
});

describe("should handle entities", () => {
it("for XML entities", () =>
expect(
tokenize("&>&amp<üa&#x62c&#100&#101", {
xmlMode: true,
})
).toMatchSnapshot());

it("for entities in attributes (#276)", () =>
expect(
tokenize(
'<img src="?&image_uri=1&&image;=2&image=3"/>?&image_uri=1&&image;=2&image=3'
)
).toMatchSnapshot());

it("for trailing legacy entity", () =>
expect(tokenize("&timesbar;&timesbar")).toMatchSnapshot());

it("for multi-byte entities", () =>
expect(tokenize("&NotGreaterFullEqual;")).toMatchSnapshot());
});

it("should not lose data when pausing", () => {
const log: unknown[][] = [];
const tokenizer = new Tokenizer(
Expand All @@ -75,7 +97,8 @@ describe("Tokenizer", () => {
) as Callbacks
);

tokenizer.write("&amp; it up!");
tokenizer.write("&am");
tokenizer.write("p; it up!");
tokenizer.resume();
tokenizer.resume();

Expand Down
Loading

0 comments on commit 11b6af2

Please sign in to comment.