Skip to content

Commit

Permalink
feat(regex): Add regexEscape function
Browse files Browse the repository at this point in the history
  • Loading branch information
lionel-rowe committed Apr 27, 2023
1 parent 934cb11 commit 4077a39
Show file tree
Hide file tree
Showing 3 changed files with 189 additions and 0 deletions.
11 changes: 11 additions & 0 deletions regex/mod.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
// Copyright 2018-2023 the Deno authors. All rights reserved. MIT license.
// This module is browser compatible.

/**
* Functions for regex-related tasks such as escaping text for interpolation
* into regexes
*
* @module
*/

export * from "./regex_escape.ts";
83 changes: 83 additions & 0 deletions regex/regex_escape.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
// Copyright 2018-2023 the Deno authors. All rights reserved. MIT license.

// // For future forward-compatibility with regex `v` flag, reservedCharMap is
// // autogenerated from the ClassSetReservedDoublePunctuator,
// // ClassSetSyntaxCharacter, and ClassSetReservedPunctuator categories in the
// // draft spec.
// // See https://github.com/tc39/proposal-regexp-v-flag#how-is-the-v-flag-different-from-the-u-flag
// // and https://arai-a.github.io/ecma262-compare/snapshot.html?pr=2418#prod-ClassSetReservedDoublePunctuator
// const reservedChars = [...new Set(['ClassSetReservedDoublePunctuator', 'ClassSetSyntaxCharacter', 'ClassSetReservedPunctuator'].map(n =>
// document.querySelector(`[name=${n}] emu-rhs`).textContent.replaceAll(/\s/g, '')
// ).join(''))]
// const reservedCharMap = Object.fromEntries(reservedChars
// .map(x => {
// try {
// for (const flag of 'gimsuy') {
// new RegExp(`\\${x}`, flag)
// new RegExp(`[\\${x}]`, flag)
// }
// return [x, `\\${x}`]
// } catch (e) {
// return [x, `\\x${x.codePointAt(0).toString(16).padStart(2, '0')}`]
// }
// }))

const reservedCharMap = {
"&": "\\x26",
"!": "\\x21",
"#": "\\x23",
"$": "\\$",
"%": "\\x25",
"*": "\\*",
"+": "\\+",
",": "\\x2c",
".": "\\.",
":": "\\x3a",
";": "\\x3b",
"<": "\\x3c",
"=": "\\x3d",
">": "\\x3e",
"?": "\\?",
"@": "\\x40",
"^": "\\^",
"`": "\\x60",
"~": "\\x7e",
"(": "\\(",
")": "\\)",
"[": "\\[",
"]": "\\]",
"{": "\\{",
"}": "\\}",
"/": "\\/",
"-": "\\x2d",
"\\": "\\\\",
"|": "\\|",
};

const RX_REGEX_ESCAPE = new RegExp(
`[${Object.values(reservedCharMap).join("")}]`,
"gu",
);

/**
* Escapes arbitrary text for interpolation into a regex, such that it will
* match exactly that text and nothing else.
*
* @example
* ```ts
* import { regexEscape } from "https://deno.land/std@$STD_VERSION/regex/regex_escape.ts";
* import { assertEquals, assertMatch, assertNotMatch } from "https://deno.land/std@$STD_VERSION/testing/asserts.ts";
*
* const re = new RegExp(`^${regexEscape(".")}$`, "u");
*
* assertEquals("^\\.$", re.source);
* assertMatch(".", re);
* assertNotMatch("a", re);
* ```
*/
export function regexEscape(str: string) {
return str.replaceAll(
RX_REGEX_ESCAPE,
(m) => reservedCharMap[m as keyof typeof reservedCharMap],
);
}
95 changes: 95 additions & 0 deletions regex/regex_escape_test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
// Copyright 2018-2023 the Deno authors. All rights reserved. MIT license.

import { regexEscape } from "./regex_escape.ts";
import {
assertEquals,
assertMatch,
assertNotMatch,
} from "../testing/asserts.ts";

const ALL_ASCII =
"\x00\x01\x02\x03\x04\x05\x06\x07\b\t\n\v\f\r\x0E\x0F\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\x7F";
const ALL_REGEX_FLAGS = "gimsuy";

Deno.test("regexEscape", async (t) => {
await t.step("examples", async (t) => {
await t.step("`.` matches literal `.`", () => {
const re = new RegExp(`^${regexEscape(".")}$`, "u");

assertEquals("^\\.$", re.source);
assertMatch(".", re);
assertNotMatch("a", re);
});
await t.step("`$` matches literal `$`", () => {
const re = new RegExp(`^${regexEscape("$")}$`);

assertMatch("$", re);
assertNotMatch("", re);
});
await t.step("`*` matches literal `*`", () => {
const re = new RegExp(`^${regexEscape("a*")}$`);

assertMatch("a*", re);
assertNotMatch("", re);
assertNotMatch("aaa", re);
});
await t.step("escapes work correctly within character class", () => {
const re = new RegExp(`^[${regexEscape(".$*+[](){}|\\<>")}]$`);

assertMatch(".", re);
assertMatch("$", re);
assertMatch("*", re);
assertMatch("+", re);
assertMatch("[", re);
assertMatch("]", re);
assertMatch("(", re);
assertMatch(")", re);
assertMatch("{", re);
assertMatch("}", re);
assertMatch("|", re);
assertMatch("\\", re);
assertMatch("<", re);
assertMatch(">", re);

assertNotMatch("a", re);
});
});
await t.step("all ASCII", async (t) => {
await t.step("interpolates without erroring", async (t) => {
await t.step("outside character class", () => {
for (const char of ALL_ASCII) {
for (const flag of ALL_REGEX_FLAGS) {
new RegExp(regexEscape(char), flag);
}
}
});
await t.step("within character class", () => {
for (const char of ALL_ASCII) {
for (const flag of ALL_REGEX_FLAGS) {
new RegExp(`[${regexEscape(char)}]`, flag);
}
}
});
await t.step("matches self", () => {
for (const char of ALL_ASCII) {
for (const flag of ALL_REGEX_FLAGS) {
assertMatch(char, new RegExp(`^${regexEscape(char)}$`, flag));
}
}
});
await t.step("doesn't match any other chars", () => {
for (const char of ALL_ASCII) {
for (const flag of ALL_REGEX_FLAGS) {
if (flag === "i") continue;

for (const char2 of ALL_ASCII) {
if (char2 === char) continue;

assertNotMatch(char2, new RegExp(`^${regexEscape(char)}$`, flag));
}
}
}
});
});
});
});

0 comments on commit 4077a39

Please sign in to comment.