Skip to content

Commit

Permalink
Merge f1bf302 into 5edc8df
Browse files Browse the repository at this point in the history
  • Loading branch information
fb55 committed Apr 8, 2023
2 parents 5edc8df + f1bf302 commit f13f143
Show file tree
Hide file tree
Showing 5 changed files with 767 additions and 116 deletions.
237 changes: 237 additions & 0 deletions src/decode.spec.ts
Expand Up @@ -13,6 +13,7 @@ describe("Decode test", () => {
{ input: ":", output: ":" },
{ input: ":", output: ":" },
{ input: ":", output: ":" },
{ input: "&#", output: "&#" },
{ input: "&>", output: "&>" },
{ input: "id=770&#anchor", output: "id=770&#anchor" },
];
Expand Down Expand Up @@ -42,4 +43,240 @@ describe("Decode test", () => {

it("should parse &nbsp followed by < (#852)", () =>
expect(entities.decodeHTML("&nbsp<")).toBe("\u00a0<"));

it("should decode trailing legacy entities", () => {
expect(entities.decodeHTML("&timesbar;&timesbar")).toBe("⨱×bar");
});

it("should decode multi-byte entities", () => {
expect(entities.decodeHTML("&NotGreaterFullEqual;")).toBe("≧̸");
});

it("should not decode legacy entities followed by text in attribute mode", () => {
expect(
entities.decodeHTML("&not", entities.DecodingMode.Attribute)
).toBe("¬");

expect(
entities.decodeHTML("&noti", entities.DecodingMode.Attribute)
).toBe("&noti");

expect(
entities.decodeHTML("&not=", entities.DecodingMode.Attribute)
).toBe("&not=");

expect(entities.decodeHTMLAttribute("&notp")).toBe("&notp");
expect(entities.decodeHTMLAttribute("&notP")).toBe("&notP");
expect(entities.decodeHTMLAttribute("&not3")).toBe("&not3");
});
});

describe("EntityDecoder", () => {
it("should decode decimal entities", () => {
const cb = jest.fn();
const decoder = new entities.EntityDecoder(entities.htmlDecodeTree, cb);

expect(decoder.write("&#5", 1)).toBe(-1);
expect(decoder.write("8;", 0)).toBe(5);

expect(cb).toHaveBeenCalledTimes(1);
expect(cb).toHaveBeenCalledWith(":".charCodeAt(0), 5);
});

it("should decode hex entities", () => {
const cb = jest.fn();
const decoder = new entities.EntityDecoder(entities.htmlDecodeTree, cb);

expect(decoder.write("&#x3a;", 1)).toBe(6);

expect(cb).toHaveBeenCalledTimes(1);
expect(cb).toHaveBeenCalledWith(":".charCodeAt(0), 6);
});

it("should decode named entities", () => {
const cb = jest.fn();
const decoder = new entities.EntityDecoder(entities.htmlDecodeTree, cb);

expect(decoder.write("&amp;", 1)).toBe(5);

expect(cb).toHaveBeenCalledTimes(1);
expect(cb).toHaveBeenCalledWith("&".charCodeAt(0), 5);
});

it("should decode legacy entities", () => {
const cb = jest.fn();
const decoder = new entities.EntityDecoder(entities.htmlDecodeTree, cb);
decoder.startEntity(entities.DecodingMode.Legacy);

expect(decoder.write("&amp", 1)).toBe(-1);

expect(cb).toHaveBeenCalledTimes(0);

expect(decoder.end()).toBe(4);

expect(cb).toHaveBeenCalledTimes(1);
expect(cb).toHaveBeenCalledWith("&".charCodeAt(0), 4);
});

it("should decode named entity written character by character", () => {
const cb = jest.fn();
const decoder = new entities.EntityDecoder(entities.htmlDecodeTree, cb);

for (const c of "amp") {
expect(decoder.write(c, 0)).toBe(-1);
}
expect(decoder.write(";", 0)).toBe(5);

expect(cb).toHaveBeenCalledTimes(1);
expect(cb).toHaveBeenCalledWith("&".charCodeAt(0), 5);
});

it("should decode numeric entity written character by character", () => {
const cb = jest.fn();
const decoder = new entities.EntityDecoder(entities.htmlDecodeTree, cb);

for (const c of "#x3a") {
expect(decoder.write(c, 0)).toBe(-1);
}
expect(decoder.write(";", 0)).toBe(6);

expect(cb).toHaveBeenCalledTimes(1);
expect(cb).toHaveBeenCalledWith(":".charCodeAt(0), 6);
});

it("should not fail if nothing is written", () => {
const cb = jest.fn();
const decoder = new entities.EntityDecoder(entities.htmlDecodeTree, cb);

expect(decoder.end()).toBe(0);
expect(cb).toHaveBeenCalledTimes(0);
});

describe("errors", () => {
it("should produce an error for a named entity without a semicolon", () => {
const errorHandlers = {
missingSemicolonAfterCharacterReference: jest.fn(),
absenceOfDigitsInNumericCharacterReference: jest.fn(),
validateNumericCharacterReference: jest.fn(),
};
const cb = jest.fn();
const decoder = new entities.EntityDecoder(
entities.htmlDecodeTree,
cb,
errorHandlers
);

decoder.startEntity(entities.DecodingMode.Legacy);
expect(decoder.write("&amp;", 1)).toBe(5);
expect(cb).toHaveBeenCalledTimes(1);
expect(cb).toHaveBeenCalledWith("&".charCodeAt(0), 5);
expect(
errorHandlers.missingSemicolonAfterCharacterReference
).toHaveBeenCalledTimes(0);

decoder.startEntity(entities.DecodingMode.Legacy);
expect(decoder.write("&amp", 1)).toBe(-1);
expect(decoder.end()).toBe(4);

expect(cb).toHaveBeenCalledTimes(2);
expect(cb).toHaveBeenLastCalledWith("&".charCodeAt(0), 4);
expect(
errorHandlers.missingSemicolonAfterCharacterReference
).toHaveBeenCalledTimes(1);
});

it("should produce an error for a numeric entity without a semicolon", () => {
const errorHandlers = {
missingSemicolonAfterCharacterReference: jest.fn(),
absenceOfDigitsInNumericCharacterReference: jest.fn(),
validateNumericCharacterReference: jest.fn(),
};
const cb = jest.fn();
const decoder = new entities.EntityDecoder(
entities.htmlDecodeTree,
cb,
errorHandlers
);

decoder.startEntity(entities.DecodingMode.Legacy);
expect(decoder.write("&#x3a", 1)).toBe(-1);
expect(decoder.end()).toBe(5);

expect(cb).toHaveBeenCalledTimes(1);
expect(cb).toHaveBeenCalledWith(0x3a, 5);
expect(
errorHandlers.missingSemicolonAfterCharacterReference
).toHaveBeenCalledTimes(1);
expect(
errorHandlers.absenceOfDigitsInNumericCharacterReference
).toHaveBeenCalledTimes(0);
expect(
errorHandlers.validateNumericCharacterReference
).toHaveBeenCalledTimes(1);
expect(
errorHandlers.validateNumericCharacterReference
).toHaveBeenCalledWith(0x3a);
});

it("should produce an error for numeric entities without digits", () => {
const errorHandlers = {
missingSemicolonAfterCharacterReference: jest.fn(),
absenceOfDigitsInNumericCharacterReference: jest.fn(),
validateNumericCharacterReference: jest.fn(),
};
const cb = jest.fn();
const decoder = new entities.EntityDecoder(
entities.htmlDecodeTree,
cb,
errorHandlers
);

decoder.startEntity(entities.DecodingMode.Legacy);
expect(decoder.write("&#", 1)).toBe(-1);
expect(decoder.end()).toBe(0);

expect(cb).toHaveBeenCalledTimes(0);
expect(
errorHandlers.missingSemicolonAfterCharacterReference
).toHaveBeenCalledTimes(0);
expect(
errorHandlers.absenceOfDigitsInNumericCharacterReference
).toHaveBeenCalledTimes(1);
expect(
errorHandlers.absenceOfDigitsInNumericCharacterReference
).toHaveBeenCalledWith(2);
expect(
errorHandlers.validateNumericCharacterReference
).toHaveBeenCalledTimes(0);
});

it("should produce an error for hex entities without digits", () => {
const errorHandlers = {
missingSemicolonAfterCharacterReference: jest.fn(),
absenceOfDigitsInNumericCharacterReference: jest.fn(),
validateNumericCharacterReference: jest.fn(),
};
const cb = jest.fn();
const decoder = new entities.EntityDecoder(
entities.htmlDecodeTree,
cb,
errorHandlers
);

decoder.startEntity(entities.DecodingMode.Legacy);
expect(decoder.write("&#x", 1)).toBe(-1);
expect(decoder.end()).toBe(0);

expect(cb).toHaveBeenCalledTimes(0);
expect(
errorHandlers.missingSemicolonAfterCharacterReference
).toHaveBeenCalledTimes(0);
expect(
errorHandlers.absenceOfDigitsInNumericCharacterReference
).toHaveBeenCalledTimes(1);
expect(
errorHandlers.validateNumericCharacterReference
).toHaveBeenCalledTimes(0);
});
});
});

0 comments on commit f13f143

Please sign in to comment.