diff --git a/regex/mod.ts b/regex/mod.ts new file mode 100644 index 000000000000..a87165e0cf27 --- /dev/null +++ b/regex/mod.ts @@ -0,0 +1,11 @@ +// Copyright 2018-2023 the Deno authors. All rights reserved. MIT license. +// This module is browser compatible. + +/** + * Functions for regex-related tasks such as escaping text for interpolation + * into regexes + * + * @module + */ + +export * from "./regex_escape.ts"; diff --git a/regex/regex_escape.ts b/regex/regex_escape.ts new file mode 100644 index 000000000000..ddf4e3bf7dad --- /dev/null +++ b/regex/regex_escape.ts @@ -0,0 +1,83 @@ +// Copyright 2018-2023 the Deno authors. All rights reserved. MIT license. + +// // For future forward-compatibility with regex `v` flag, reservedCharMap is +// // autogenerated from the ClassSetReservedDoublePunctuator, +// // ClassSetSyntaxCharacter, and ClassSetReservedPunctuator categories in the +// // draft spec. +// // See https://github.com/tc39/proposal-regexp-v-flag#how-is-the-v-flag-different-from-the-u-flag +// // and https://arai-a.github.io/ecma262-compare/snapshot.html?pr=2418#prod-ClassSetReservedDoublePunctuator +// const reservedChars = [...new Set(['ClassSetReservedDoublePunctuator', 'ClassSetSyntaxCharacter', 'ClassSetReservedPunctuator'].map(n => +// document.querySelector(`[name=${n}] emu-rhs`).textContent.replaceAll(/\s/g, '') +// ).join(''))] +// const reservedCharMap = Object.fromEntries(reservedChars +// .map(x => { +// try { +// for (const flag of 'gimsuy') { +// new RegExp(`\\${x}`, flag) +// new RegExp(`[\\${x}]`, flag) +// } +// return [x, `\\${x}`] +// } catch (e) { +// return [x, `\\x${x.codePointAt(0).toString(16).padStart(2, '0')}`] +// } +// })) + +const reservedCharMap = { + "&": "\\x26", + "!": "\\x21", + "#": "\\x23", + "$": "\\$", + "%": "\\x25", + "*": "\\*", + "+": "\\+", + ",": "\\x2c", + ".": "\\.", + ":": "\\x3a", + ";": "\\x3b", + "<": "\\x3c", + "=": "\\x3d", + ">": "\\x3e", + "?": "\\?", + "@": "\\x40", + "^": "\\^", + "`": "\\x60", + "~": "\\x7e", + "(": "\\(", + ")": "\\)", + "[": "\\[", + "]": "\\]", + "{": "\\{", + "}": "\\}", + "/": "\\/", + "-": "\\x2d", + "\\": "\\\\", + "|": "\\|", +}; + +const RX_REGEX_ESCAPE = new RegExp( + `[${Object.values(reservedCharMap).join("")}]`, + "gu", +); + +/** + * Escapes arbitrary text for interpolation into a regex, such that it will + * match exactly that text and nothing else. + * + * @example + * ```ts + * import { regexEscape } from "https://deno.land/std@$STD_VERSION/regex/regex_escape.ts"; + * import { assertEquals, assertMatch, assertNotMatch } from "https://deno.land/std@$STD_VERSION/testing/asserts.ts"; + * + * const re = new RegExp(`^${regexEscape(".")}$`, "u"); + * + * assertEquals("^\\.$", re.source); + * assertMatch(".", re); + * assertNotMatch("a", re); + * ``` + */ +export function regexEscape(str: string) { + return str.replaceAll( + RX_REGEX_ESCAPE, + (m) => reservedCharMap[m as keyof typeof reservedCharMap], + ); +} diff --git a/regex/regex_escape_test.ts b/regex/regex_escape_test.ts new file mode 100644 index 000000000000..1cb523c8c789 --- /dev/null +++ b/regex/regex_escape_test.ts @@ -0,0 +1,91 @@ +// Copyright 2018-2023 the Deno authors. All rights reserved. MIT license. + +import { regexEscape } from "./regex_escape.ts"; +import { assertEquals, assertMatch, assertNotMatch } from "../testing/asserts.ts"; + +const ALL_ASCII = + "\x00\x01\x02\x03\x04\x05\x06\x07\b\t\n\v\f\r\x0E\x0F\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\x7F"; +const ALL_REGEX_FLAGS = "gimsuy"; + +Deno.test("regexEscape", async (t) => { + await t.step("examples", async (t) => { + await t.step("`.` matches literal `.`", () => { + const re = new RegExp(`^${regexEscape(".")}$`, "u"); + + assertEquals("^\\.$", re.source); + assertMatch(".", re); + assertNotMatch("a", re); + }); + await t.step("`$` matches literal `$`", () => { + const re = new RegExp(`^${regexEscape("$")}$`); + + assertMatch("$", re); + assertNotMatch("", re); + }); + await t.step("`*` matches literal `*`", () => { + const re = new RegExp(`^${regexEscape("a*")}$`); + + assertMatch("a*", re); + assertNotMatch("", re); + assertNotMatch("aaa", re); + }); + await t.step("escapes work correctly within character class", () => { + const re = new RegExp(`^[${regexEscape(".$*+[](){}|\\<>")}]$`); + + assertMatch(".", re); + assertMatch("$", re); + assertMatch("*", re); + assertMatch("+", re); + assertMatch("[", re); + assertMatch("]", re); + assertMatch("(", re); + assertMatch(")", re); + assertMatch("{", re); + assertMatch("}", re); + assertMatch("|", re); + assertMatch("\\", re); + assertMatch("<", re); + assertMatch(">", re); + + assertNotMatch("a", re); + }); + }); + await t.step("all ASCII", async (t) => { + await t.step("interpolates without erroring", async (t) => { + await t.step("outside character class", () => { + for (const char of ALL_ASCII) { + for (const flag of ALL_REGEX_FLAGS) { + new RegExp(regexEscape(char), flag); + } + } + }); + await t.step("within character class", () => { + for (const char of ALL_ASCII) { + for (const flag of ALL_REGEX_FLAGS) { + new RegExp(`[${regexEscape(char)}]`, flag); + } + } + }); + await t.step("matches self", () => { + for (const char of ALL_ASCII) { + for (const flag of ALL_REGEX_FLAGS) { + assertMatch(char, new RegExp(`^${regexEscape(char)}$`, flag)); + } + } + }); + await t.step("doesn't match any other chars", () => { + for (const char of ALL_ASCII) { + for (const flag of ALL_REGEX_FLAGS) { + if (flag === "i") continue; + + for (const char2 of ALL_ASCII) { + if (char2 === char) continue; + + assertNotMatch(char2, new RegExp(`^${regexEscape(char)}$`, flag)); + } + } + } + }); + }); + }); +});