Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(regexp): add escape function #3334

Merged
merged 3 commits into from
May 11, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
83 changes: 83 additions & 0 deletions regexp/escape.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
// Copyright 2018-2023 the Deno authors. All rights reserved. MIT license.

// // For future forward-compatibility with regexp `v` flag, reservedCharMap is
// // autogenerated from the ClassSetReservedDoublePunctuator,
// // ClassSetSyntaxCharacter, and ClassSetReservedPunctuator categories in the
// // draft spec.
// // See https://github.com/tc39/proposal-regexp-v-flag#how-is-the-v-flag-different-from-the-u-flag
// // and https://arai-a.github.io/ecma262-compare/snapshot.html?pr=2418#prod-ClassSetReservedDoublePunctuator
// const reservedChars = [...new Set(['ClassSetReservedDoublePunctuator', 'ClassSetSyntaxCharacter', 'ClassSetReservedPunctuator'].map(n =>
// document.querySelector(`[name=${n}] emu-rhs`).textContent.replaceAll(/\s/g, '')
// ).join(''))]
// const reservedCharMap = Object.fromEntries(reservedChars
// .map(x => {
// try {
// for (const flag of 'gimsuy') {
// new RegExp(`\\${x}`, flag)
// new RegExp(`[\\${x}]`, flag)
// }
// return [x, `\\${x}`]
// } catch (e) {
// return [x, `\\x${x.codePointAt(0).toString(16).padStart(2, '0')}`]
// }
// }))

const reservedCharMap = {
"&": "\\x26",
"!": "\\x21",
"#": "\\x23",
"$": "\\$",
"%": "\\x25",
"*": "\\*",
"+": "\\+",
",": "\\x2c",
".": "\\.",
":": "\\x3a",
";": "\\x3b",
"<": "\\x3c",
"=": "\\x3d",
">": "\\x3e",
"?": "\\?",
"@": "\\x40",
"^": "\\^",
"`": "\\x60",
"~": "\\x7e",
"(": "\\(",
")": "\\)",
"[": "\\[",
"]": "\\]",
"{": "\\{",
"}": "\\}",
"/": "\\/",
"-": "\\x2d",
"\\": "\\\\",
"|": "\\|",
};

const RX_REGEXP_ESCAPE = new RegExp(
`[${Object.values(reservedCharMap).join("")}]`,
"gu",
);

/**
* Escapes arbitrary text for interpolation into a regexp, such that it will
* match exactly that text and nothing else.
*
* @example
* ```ts
* import { escape } from "https://deno.land/std@$STD_VERSION/regexp/mod.ts";
* import { assertEquals, assertMatch, assertNotMatch } from "https://deno.land/std@$STD_VERSION/testing/asserts.ts";
*
* const re = new RegExp(`^${escape(".")}$`, "u");
*
* assertEquals("^\\.$", re.source);
* assertMatch(".", re);
* assertNotMatch("a", re);
* ```
*/
export function escape(str: string) {
return str.replaceAll(
RX_REGEXP_ESCAPE,
(m) => reservedCharMap[m as keyof typeof reservedCharMap],
);
}
100 changes: 100 additions & 0 deletions regexp/escape_test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
// Copyright 2018-2023 the Deno authors. All rights reserved. MIT license.

import { escape } from "./escape.ts";
import {
assertEquals,
assertMatch,
assertNotMatch,
} from "../testing/asserts.ts";

const ALL_ASCII =
"\x00\x01\x02\x03\x04\x05\x06\x07\b\t\n\v\f\r\x0E\x0F\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\x7F";
const ALL_REGEXP_FLAGS = "gimsuy";

Deno.test("regexp", async (t) => {
await t.step("escape", async (t) => {
await t.step("examples", async (t) => {
await t.step("`.` matches literal `.`", () => {
const re = new RegExp(`^${escape(".")}$`, "u");

assertEquals("^\\.$", re.source);
assertMatch(".", re);
assertNotMatch("a", re);
});
await t.step("`$` matches literal `$`", () => {
const re = new RegExp(`^${escape("$")}$`);

assertMatch("$", re);
assertNotMatch("", re);
});
await t.step("`*` matches literal `*`", () => {
const re = new RegExp(`^${escape("a*")}$`);

assertMatch("a*", re);
assertNotMatch("", re);
assertNotMatch("aaa", re);
});
await t.step("escapes work correctly within character class", () => {
const re = new RegExp(`^[${escape(".$*+[](){}|\\<>")}]$`);

assertMatch(".", re);
assertMatch("$", re);
assertMatch("*", re);
assertMatch("+", re);
assertMatch("[", re);
assertMatch("]", re);
assertMatch("(", re);
assertMatch(")", re);
assertMatch("{", re);
assertMatch("}", re);
assertMatch("|", re);
assertMatch("\\", re);
assertMatch("<", re);
assertMatch(">", re);

assertNotMatch("a", re);
});
});
await t.step("all ASCII", async (t) => {
await t.step("interpolates without erroring", async (t) => {
await t.step("outside character class", () => {
for (const char of ALL_ASCII) {
for (const flag of ALL_REGEXP_FLAGS) {
new RegExp(escape(char), flag);
}
}
});
await t.step("within character class", () => {
for (const char of ALL_ASCII) {
for (const flag of ALL_REGEXP_FLAGS) {
new RegExp(`[${escape(char)}]`, flag);
}
}
});
await t.step("matches self", () => {
for (const char of ALL_ASCII) {
for (const flag of ALL_REGEXP_FLAGS) {
assertMatch(char, new RegExp(`^${escape(char)}$`, flag));
}
}
});
await t.step("doesn't match any other chars", () => {
for (const char of ALL_ASCII) {
for (const flag of ALL_REGEXP_FLAGS) {
if (flag === "i") continue;

for (const char2 of ALL_ASCII) {
if (char2 === char) continue;

assertNotMatch(
char2,
new RegExp(`^${escape(char)}$`, flag),
);
}
}
}
});
});
});
});
});
11 changes: 11 additions & 0 deletions regexp/mod.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
// Copyright 2018-2023 the Deno authors. All rights reserved. MIT license.
// This module is browser compatible.

/**
* Functions for tasks related to regular expression (regexps), such as
* escaping text for interpolation into a regexp
*
* @module
*/

export * from "./escape.ts";