Skip to content

Commit 5bfb235

Browse files
refactor(lsp): remove fancy-regex from route matching (#34813)
Replaces fancy-regex dependency with a little handwritten matcher --------- Co-authored-by: Nathan Whitaker <nathan@deno.com>
1 parent c7f97e5 commit 5bfb235

4 files changed

Lines changed: 168 additions & 43 deletions

File tree

Cargo.lock

Lines changed: 0 additions & 12 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -354,7 +354,6 @@ dprint-plugin-jupyter = "=0.2.2"
354354
dprint-plugin-markdown = "=0.20.0"
355355
dprint-plugin-typescript = "=0.96.1"
356356
env_logger = "=0.11.6"
357-
fancy-regex = "=0.14.0"
358357
imara-diff = "=0.2.0"
359358
libsui = "0.12.6"
360359
malva = "=0.15.2"

cli/Cargo.toml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -114,7 +114,6 @@ dprint-plugin-jupyter.workspace = true
114114
dprint-plugin-markdown.workspace = true
115115
dprint-plugin-typescript.workspace = true
116116
esbuild_client = { version = "0.7.1", features = ["serde"] }
117-
fancy-regex.workspace = true
118117
faster-hex.workspace = true
119118
# If you disable the default __vendored_zlib_ng feature above, you _must_ be able to link against `-lz`.
120119
flate2.workspace = true

cli/lsp/path_to_regex.rs

Lines changed: 168 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -33,9 +33,9 @@ use std::iter::Peekable;
3333

3434
use deno_core::anyhow::anyhow;
3535
use deno_core::error::AnyError;
36-
use fancy_regex::Regex as FancyRegex;
3736
use once_cell::sync::Lazy;
3837
use regex::Regex;
38+
use regex::RegexBuilder;
3939

4040
static ESCAPE_STRING_RE: Lazy<Regex> =
4141
lazy_regex::lazy_regex!(r"([.+*?=^!:${}()\[\]|/\\])");
@@ -366,6 +366,108 @@ pub struct PathToRegexOptions {
366366
token_to_regex_options: Option<TokensToRegexOptions>,
367367
}
368368

369+
#[cfg(test)]
370+
#[derive(Debug)]
371+
struct RouteMatch<'a> {
372+
path: &'a str,
373+
start: usize,
374+
end: usize,
375+
}
376+
377+
#[cfg(test)]
378+
impl<'a> RouteMatch<'a> {
379+
fn as_str(&self) -> &'a str {
380+
&self.path[self.start..self.end]
381+
}
382+
383+
fn start(&self) -> usize {
384+
self.start
385+
}
386+
387+
fn end(&self) -> usize {
388+
self.end
389+
}
390+
}
391+
392+
#[derive(Debug, Clone)]
393+
struct Boundary {
394+
allowed: String,
395+
allow_end: bool,
396+
trim_trailing: String,
397+
}
398+
399+
impl Boundary {
400+
fn checked_end(&self, path: &str, end: usize) -> Option<usize> {
401+
if self.matches(path, end) {
402+
return Some(end);
403+
}
404+
let (trimmed_end, c) = path[..end].char_indices().next_back()?;
405+
if self.trim_trailing.contains(c) && self.matches(path, trimmed_end) {
406+
Some(trimmed_end)
407+
} else {
408+
None
409+
}
410+
}
411+
412+
fn matches(&self, path: &str, end: usize) -> bool {
413+
if end == path.len() {
414+
return self.allow_end;
415+
}
416+
path[end..]
417+
.chars()
418+
.next()
419+
.is_some_and(|c| self.allowed.contains(c))
420+
}
421+
}
422+
423+
#[derive(Debug)]
424+
pub struct RouteRegex {
425+
re: Regex,
426+
boundary: Option<Boundary>,
427+
}
428+
429+
impl RouteRegex {
430+
fn new(
431+
pattern: &str,
432+
sensitive: bool,
433+
boundary: Option<Boundary>,
434+
) -> Result<Self, AnyError> {
435+
Ok(Self {
436+
re: RegexBuilder::new(pattern)
437+
.case_insensitive(!sensitive)
438+
.build()?,
439+
boundary,
440+
})
441+
}
442+
443+
#[cfg(test)]
444+
fn find<'a>(&self, path: &'a str) -> Option<RouteMatch<'a>> {
445+
self.re.find_iter(path).find_map(|m| {
446+
self
447+
.boundary
448+
.as_ref()
449+
.map_or(Some(m.end()), |boundary| {
450+
boundary.checked_end(path, m.end())
451+
})
452+
.map(|end| RouteMatch {
453+
path,
454+
start: m.start(),
455+
end,
456+
})
457+
})
458+
}
459+
460+
fn captures<'a>(&self, path: &'a str) -> Option<regex::Captures<'a>> {
461+
self.re.captures_iter(path).find(|caps| {
462+
let m = caps.get(0).unwrap();
463+
self
464+
.boundary
465+
.as_ref()
466+
.is_none_or(|boundary| boundary.checked_end(path, m.end()).is_some())
467+
})
468+
}
469+
}
470+
369471
fn try_consume(
370472
token_type: &TokenType,
371473
it: &mut Peekable<impl Iterator<Item = LexToken>>,
@@ -536,7 +638,7 @@ pub fn parse(
536638
pub fn tokens_to_regex(
537639
tokens: &[Token],
538640
maybe_options: Option<TokensToRegexOptions>,
539-
) -> Result<(FancyRegex, Option<Vec<Key>>), AnyError> {
641+
) -> Result<(RouteRegex, Option<Vec<Key>>), AnyError> {
540642
let TokensToRegexOptions {
541643
sensitive,
542644
strict,
@@ -546,9 +648,9 @@ pub fn tokens_to_regex(
546648
ends_with,
547649
} = maybe_options.unwrap_or_default();
548650
let has_ends_with = ends_with.is_some();
549-
let ends_with = format!(r"[{}]|$", ends_with.unwrap_or_default());
550-
let delimiter =
551-
format!(r"[{}]", delimiter.unwrap_or_else(|| "/#?".to_string()));
651+
let ends_with = ends_with.unwrap_or_default();
652+
let delimiter = delimiter.unwrap_or_else(|| "/#?".to_string());
653+
let delimiter_pattern = format!(r"[{}]", escape_string(&delimiter));
552654
let mut route = if start {
553655
"^".to_string()
554656
} else {
@@ -619,13 +721,22 @@ pub fn tokens_to_regex(
619721

620722
if end {
621723
if !strict {
622-
write!(route, r"{delimiter}?").unwrap();
724+
write!(route, r"{delimiter_pattern}?").unwrap();
623725
}
624-
if has_ends_with {
625-
write!(route, r"(?={ends_with})").unwrap();
726+
let boundary = if has_ends_with {
727+
Some(Boundary {
728+
allowed: ends_with,
729+
allow_end: true,
730+
trim_trailing: delimiter,
731+
})
626732
} else {
627733
route.push('$');
628-
}
734+
None
735+
};
736+
let re = RouteRegex::new(&route, sensitive, boundary)?;
737+
let maybe_keys = if keys.is_empty() { None } else { Some(keys) };
738+
739+
Ok((re, maybe_keys))
629740
} else {
630741
let is_end_delimited = match maybe_end_token {
631742
Some(Token::String(mut s)) => {
@@ -639,28 +750,30 @@ pub fn tokens_to_regex(
639750
None => true,
640751
};
641752

642-
if !strict {
643-
write!(route, r"(?:{delimiter}(?={ends_with}))?").unwrap();
644-
}
753+
let mut allowed = delimiter;
754+
allowed.push_str(&ends_with);
755+
let boundary = if !is_end_delimited {
756+
Some(Boundary {
757+
allowed,
758+
allow_end: true,
759+
trim_trailing: String::new(),
760+
})
761+
} else {
762+
None
763+
};
764+
let re = RouteRegex::new(&route, sensitive, boundary)?;
765+
let maybe_keys = if keys.is_empty() { None } else { Some(keys) };
645766

646-
if !is_end_delimited {
647-
write!(route, r"(?={delimiter}|{ends_with})").unwrap();
648-
}
767+
Ok((re, maybe_keys))
649768
}
650-
651-
let flags = if sensitive { "" } else { "(?i)" };
652-
let re = FancyRegex::new(&format!("{flags}{route}"))?;
653-
let maybe_keys = if keys.is_empty() { None } else { Some(keys) };
654-
655-
Ok((re, maybe_keys))
656769
}
657770

658771
/// Convert a path-like string into a regular expression, returning the regular
659772
/// expression and optionally any keys that can be matched in the string.
660773
pub fn string_to_regex(
661774
path: &str,
662775
maybe_options: Option<PathToRegexOptions>,
663-
) -> Result<(FancyRegex, Option<Vec<Key>>), AnyError> {
776+
) -> Result<(RouteRegex, Option<Vec<Key>>), AnyError> {
664777
let (parse_options, tokens_to_regex_options) =
665778
if let Some(options) = maybe_options {
666779
(options.parse_options, options.token_to_regex_options)
@@ -807,7 +920,7 @@ impl MatchResult {
807920
#[derive(Debug)]
808921
pub struct Matcher {
809922
maybe_keys: Option<Vec<Key>>,
810-
re: FancyRegex,
923+
re: RouteRegex,
811924
}
812925

813926
impl Matcher {
@@ -821,7 +934,7 @@ impl Matcher {
821934

822935
/// Match a string path, optionally returning the match result.
823936
pub fn matches(&self, path: &str) -> Option<MatchResult> {
824-
let caps = self.re.captures(path).ok()??;
937+
let caps = self.re.captures(path)?;
825938
let mut params = HashMap::new();
826939
if let Some(keys) = &self.maybe_keys {
827940
for (i, key) in keys.iter().enumerate() {
@@ -868,11 +981,7 @@ mod tests {
868981
let (re, _) = result.unwrap();
869982
for (fixture, expected) in fixtures {
870983
let result = re.find(fixture);
871-
assert!(
872-
result.is_ok(),
873-
"Find failure for path \"{path}\" and fixture \"{fixture}\""
874-
);
875-
let actual = result.unwrap();
984+
let actual = result;
876985
if let Some((text, start, end)) = *expected {
877986
assert!(
878987
actual.is_some(),
@@ -994,5 +1103,35 @@ mod tests {
9941103
}),
9951104
&[("/TEST", Some(("/TEST", 0, 5))), ("/test", None)],
9961105
);
1106+
test_path(
1107+
"/test",
1108+
Some(PathToRegexOptions {
1109+
parse_options: None,
1110+
token_to_regex_options: Some(TokensToRegexOptions {
1111+
end: false,
1112+
..Default::default()
1113+
}),
1114+
}),
1115+
&[
1116+
("/test/route", Some(("/test", 0, 5))),
1117+
("/testing", None),
1118+
("/test", Some(("/test", 0, 5))),
1119+
],
1120+
);
1121+
test_path(
1122+
"/test",
1123+
Some(PathToRegexOptions {
1124+
parse_options: None,
1125+
token_to_regex_options: Some(TokensToRegexOptions {
1126+
ends_with: Some("#?".to_string()),
1127+
..Default::default()
1128+
}),
1129+
}),
1130+
&[
1131+
("/test#section", Some(("/test", 0, 5))),
1132+
("/test?query", Some(("/test", 0, 5))),
1133+
("/test/route", None),
1134+
],
1135+
);
9971136
}
9981137
}

0 commit comments

Comments
 (0)