Skip to content

Commit

Permalink
feat: Add new API for encode, decode functions
Browse files Browse the repository at this point in the history
The default mode of `decode` will be updated to `HTML` in the next major version.
  • Loading branch information
fb55 committed Jul 10, 2021
1 parent e1a28aa commit 817ae67
Show file tree
Hide file tree
Showing 3 changed files with 161 additions and 20 deletions.
14 changes: 11 additions & 3 deletions src/encode.spec.ts
Expand Up @@ -31,6 +31,10 @@ describe("Encode->decode test", () => {
expect(encodedHTML5).toBe(html));
it(`should HTML5 decode ${encodedHTML5}`, () =>
expect(entities.decodeHTML(encodedHTML5)).toBe(input));
it("should encode emojis", () =>
expect(entities.encodeHTML5("😄🍾🥳💥😇")).toBe(
"😄🍾🥳💥😇"
));
}

it("should encode data URIs (issue #16)", () => {
Expand All @@ -41,9 +45,13 @@ describe("Encode->decode test", () => {
});

describe("encodeNonAsciiHTML", () => {
it("should encode all non-ASCII characters", () => {
it("should encode all non-ASCII characters", () =>
expect(entities.encodeNonAsciiHTML("<test> #123! übermaßen")).toBe(
"&lt;test&gt; #123! &uuml;berma&szlig;en"
);
});
));

it("should encode emojis", () =>
expect(entities.encodeNonAsciiHTML("😄🍾🥳💥😇")).toBe(
"&#x1F604;&#x1F37E;&#x1F973;&#x1F4A5;&#x1F607;"
));
});
30 changes: 26 additions & 4 deletions src/index.spec.ts
Expand Up @@ -16,6 +16,9 @@ describe("Documents", () => {
for (const e of Object.keys(doc)) {
for (let l = i; l < levels.length; l++) {
expect(entities.decode(`&${e};`, l)).toBe(doc[e]);
expect(entities.decode(`&${e};`, { level: l })).toBe(
doc[e]
);
}
}
});
Expand All @@ -26,6 +29,12 @@ describe("Documents", () => {
for (const e of Object.keys(doc)) {
for (let l = i; l < levels.length; l++) {
expect(entities.decodeStrict(`&${e};`, l)).toBe(doc[e]);
expect(
entities.decode(`&${e};`, {
level: l,
mode: entities.DecodingMode.Strict,
})
).toBe(doc[e]);
}
}
});
Expand All @@ -41,15 +50,28 @@ describe("Documents", () => {
}
}
});

it("should only encode non-ASCII values if asked", () =>
expect(
entities.encode("Great #'s of 🎁", {
level: i,
mode: entities.EncodingMode.ASCII,
})
).toBe("Great #&apos;s of &#x1F381;"));
});
}

describe("Legacy", () => {
const legacyMap: Record<string, string> = legacy;
it("should decode", () => {
for (const e of Object.keys(legacy)) {
expect(entities.decodeHTML(`&${e}`)).toBe(
(legacy as Record<string, string>)[e]
);
for (const e of Object.keys(legacyMap)) {
expect(entities.decodeHTML(`&${e}`)).toBe(legacyMap[e]);
expect(
entities.decodeStrict(`&${e}`, {
level: entities.EntityLevel.HTML,
mode: entities.DecodingMode.Legacy,
})
).toBe(legacyMap[e]);
}
});
});
Expand Down
137 changes: 124 additions & 13 deletions src/index.ts
@@ -1,37 +1,148 @@
import { decodeXML, decodeHTML, decodeHTMLStrict } from "./decode";
import { encodeXML, encodeHTML } from "./encode";
import { encodeXML, encodeHTML, encodeNonAsciiHTML } from "./encode";

/** The level of entities to support. */
export enum EntityLevel {
/** Support only XML entities. */
XML = 0,
/** Support HTML entities, which are a superset of XML entities. */
HTML = 1,
}

/** Determines whether some entities are allowed to be written without a trailing `;`. */
export enum DecodingMode {
/** Support legacy HTML entities. */
Legacy = 0,
/** Do not support legacy HTML entities. */
Strict = 1,
}

export enum EncodingMode {
/**
* The output is UTF-8 encoded. Only characters that need escaping within
* HTML will be escaped.
*/
UTF8,
/**
* The output consists only of ASCII characters. Characters that need
* escaping within HTML, and characters that aren't ASCII characters will
* be escaped.
*/
ASCII,
/**
* Encode all characters that have an equivalent entity, as well as all
* characters that are not ASCII characters.
*/
Extensive,
}

interface DecodingOptions {
/**
* The level of entities to support.
* @default EntityLevel.XML
*/
level?: EntityLevel;
/**
* Decoding mode. If `Legacy`, will support legacy entities not terminated
* with a semicolon (`;`).
*
* Always `Strict` for XML. For HTML, set this to `true` if you are parsing
* an attribute value.
*
* The deprecated `decodeStrict` function defaults this to `Strict`.
*
* @default DecodingMode.Legacy
*/
mode?: DecodingMode;
}

/**
* Decodes a string with entities.
*
* @param data String to decode.
* @param level Optional level to decode at. 0 = XML, 1 = HTML. Default is 0.
* @deprecated Use `decodeXML` or `decodeHTML` directly.
* @param options Decoding options.
*/
export function decode(data: string, level?: number): string {
return (!level || level <= 0 ? decodeXML : decodeHTML)(data);
export function decode(
data: string,
options: DecodingOptions | EntityLevel = EntityLevel.XML
): string {
const opts = typeof options === "number" ? { level: options } : options;

if (opts.level === EntityLevel.HTML) {
if (opts.mode === DecodingMode.Strict) {
return decodeHTMLStrict(data);
}
return decodeHTML(data);
}

return decodeXML(data);
}

/**
* Decodes a string with entities. Does not allow missing trailing semicolons for entities.
*
* @param data String to decode.
* @param level Optional level to decode at. 0 = XML, 1 = HTML. Default is 0.
* @deprecated Use `decodeHTMLStrict` or `decodeXML` directly.
* @param options Decoding options.
* @deprecated Use `decode` with the `mode` set to `Strict`.
*/
export function decodeStrict(data: string, level?: number): string {
return (!level || level <= 0 ? decodeXML : decodeHTMLStrict)(data);
export function decodeStrict(
data: string,
options: DecodingOptions | EntityLevel = EntityLevel.XML
): string {
const opts = typeof options === "number" ? { level: options } : options;

if (opts.level === EntityLevel.HTML) {
if (opts.mode === DecodingMode.Legacy) {
return decodeHTML(data);
}
return decodeHTMLStrict(data);
}

return decodeXML(data);
}

/**
* Options for `encode`.
*/
export interface EncodingOptions {
/**
* The level of entities to support.
* @default EntityLevel.XML
*/
level?: EntityLevel;
/**
* Output format.
* @default EncodingMode.Extensive
*/
mode?: EncodingMode;
}

/**
* Encodes a string with entities.
*
* @param data String to encode.
* @param level Optional level to encode at. 0 = XML, 1 = HTML. Default is 0.
* @deprecated Use `encodeHTML`, `encodeXML` or `encodeNonAsciiHTML` directly.
* @param options Encoding options.
*/
export function encode(data: string, level?: number): string {
return (!level || level <= 0 ? encodeXML : encodeHTML)(data);
export function encode(
data: string,
options: EncodingOptions | EntityLevel = EntityLevel.XML
): string {
const opts = typeof options === "number" ? { level: options } : options;

if (opts.level === EntityLevel.HTML) {
if (opts.mode === EncodingMode.ASCII) {
return encodeNonAsciiHTML(data);
}

// TODO Support opts.mode === 'UTF8'

return encodeHTML(data);
}

// TODO Support opts.mode === 'UTF8'

// ASCII and Extensive are equivalent
return encodeXML(data);
}

export {
Expand Down

0 comments on commit 817ae67

Please sign in to comment.