Skip to content

Commit

Permalink
refactor: Share escaping logic between escape fns
Browse files Browse the repository at this point in the history
And narrow the maps for `escapeAttribute` and `escapeText`.
  • Loading branch information
fb55 committed Mar 31, 2022
1 parent 0af5d6d commit 2a2bc5f
Showing 1 changed file with 43 additions and 43 deletions.
86 changes: 43 additions & 43 deletions src/encode.ts
Expand Up @@ -2,10 +2,6 @@ import { encodeHTMLTrieRe, getCodePoint } from "./encode-trie";

const htmlReplacer = /[\t\n!-,./:-@[-`\f{-}$\x80-\uFFFF]/g;
const xmlReplacer = /["&'<>$\x80-\uFFFF]/g;
const xmlInvalidChars = /[&<>'"]/g;

const textReplacer = /[&<>\u00A0]/g;
const attrReplacer = /["&\u00A0]/g;

const xmlCodeMap = new Map([
[34, "&quot;"],
Expand All @@ -15,14 +11,6 @@ const xmlCodeMap = new Map([
[62, "&gt;"],
]);

const htmlEscapeCodeMap = new Map([
[34, "&quot;"],
[38, "&amp;"],
[60, "&lt;"],
[62, "&gt;"],
[160, "&nbsp;"],
]);

/**
* Encodes all non-ASCII characters, as well as characters not valid in XML
* documents using XML entities.
Expand Down Expand Up @@ -93,55 +81,67 @@ export function encodeNonAsciiHTML(data: string): string {
*/
export const escape = encodeXML;

function getEscaper(
regex: RegExp,
map: Map<number, string>
): (data: string) => string {
return function escape(data: string): string {
let match;
let lastIdx = 0;
let result = "";

while ((match = regex.exec(data))) {
if (lastIdx !== match.index) {
result += data.substring(lastIdx, match.index);
}

// We know that this chararcter will be in the map.
result += map.get(match[0].charCodeAt(0))!;

// Every match will be of length 1
lastIdx = match.index + 1;
}

return result + data.substring(lastIdx);
};
}

/**
* Encodes all characters not valid in XML documents using XML entities.
*
* Note that the output will be character-set dependent.
*
* @param data String to escape.
*/
export function escapeUTF8(data: string): string {
let match;
let lastIdx = 0;
let result = "";

while ((match = xmlInvalidChars.exec(data))) {
if (lastIdx !== match.index) {
result += data.substring(lastIdx, match.index);
}

// We know that this chararcter will be in `inverseXML`
result += xmlCodeMap.get(match[0].charCodeAt(0))!;

// Every match will be of length 1
lastIdx = match.index + 1;
}

return result + data.substring(lastIdx);
}
export const escapeUTF8 = getEscaper(/[&<>'"]/g, xmlCodeMap);

/**
* Encodes all characters that have to be escaped in HTML attributes,
* following {@link https://html.spec.whatwg.org/multipage/parsing.html#escapingString}.
*
* @param data String to escape.
*/
export function escapeAttribute(data: string): string {
return data.replace(
attrReplacer,
(match) => htmlEscapeCodeMap.get(match.charCodeAt(0))!
);
}
export const escapeAttribute = getEscaper(
/["&\u00A0]/g,
new Map([
[34, "&quot;"],
[38, "&amp;"],
[160, "&nbsp;"],
])
);

/**
* Encodes all characters that have to be escaped in HTML text,
* following {@link https://html.spec.whatwg.org/multipage/parsing.html#escapingString}.
*
* @param data String to escape.
*/
export function escapeText(data: string): string {
return data.replace(
textReplacer,
(match) => htmlEscapeCodeMap.get(match.charCodeAt(0))!
);
}
export const escapeText = getEscaper(
/[&<>\u00A0]/g,
new Map([
[38, "&amp;"],
[60, "&lt;"],
[62, "&gt;"],
[160, "&nbsp;"],
])
);

0 comments on commit 2a2bc5f

Please sign in to comment.