From 75ba6e725cf2e77df49707b5e7612aac2df1ec82 Mon Sep 17 00:00:00 2001 From: NakagamiYuta Date: Wed, 23 Oct 2024 14:04:43 +0900 Subject: [PATCH 1/2] =?UTF-8?q?[fix]=20=E5=AE=9A=E7=BE=A9=E6=B8=88?= =?UTF-8?q?=E3=81=BF=E3=83=88=E3=83=BC=E3=82=AF=E3=83=B3=E5=88=97=E3=81=AE?= =?UTF-8?q?=E5=84=AA=E5=85=88=E5=BA=A6=E3=82=92=E4=BF=9D=E6=8C=81=E3=81=97?= =?UTF-8?q?=E3=81=A6=E5=AD=97=E5=8F=A5=E8=A7=A3=E6=9E=90=E3=81=99=E3=82=8B?= =?UTF-8?q?=E3=82=88=E3=81=86=E3=81=AB=E4=BF=AE=E6=AD=A3(RegexLex)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- crates/lex_regex/src/lib.rs | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/crates/lex_regex/src/lib.rs b/crates/lex_regex/src/lib.rs index 2e22254..7b0e67a 100644 --- a/crates/lex_regex/src/lib.rs +++ b/crates/lex_regex/src/lib.rs @@ -45,24 +45,23 @@ impl LexDriver for RegexLexer { }; // Find the token - let mut matches = self + let matched = self .regex_set .matches(remain) .into_iter() .map(|idx| &self.regex_map[idx]) .map(|(regex, token)| (*token, regex.find(remain).unwrap().as_str())) - .collect::>(); - matches.sort_by(|(_, a), (_, b)| a.len().cmp(&b.len())); + .next(); // Update pos - let (token, acc_s) = match matches.first() { + let (token, acc_s) = match matched { Some(a) => a, None => return, }; let range = (pos, pos + acc_s.len()); pos += acc_s.len(); - yield Token::new(*token, &input, range); + yield Token::new(token, &input, range); } } } From 8d8f8cb024eb34556c9f50961f895e83972d1c4e Mon Sep 17 00:00:00 2001 From: NakagamiYuta Date: Wed, 23 Oct 2024 14:08:46 +0900 Subject: [PATCH 2/2] =?UTF-8?q?[fix]=20RegexLexer=20=E3=81=AE=E5=90=84?= =?UTF-8?q?=E3=83=95=E3=82=A3=E3=83=BC=E3=83=AB=E3=83=89=E3=82=92=20Rc=20?= =?UTF-8?q?=E3=81=A7=E5=8C=85=E3=82=93=E3=81=A7=E3=81=84=E3=81=9F=E3=81=AE?= =?UTF-8?q?=E3=82=92=E4=BF=AE=E6=AD=A3?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- crates/lex_regex/src/lib.rs | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/crates/lex_regex/src/lib.rs b/crates/lex_regex/src/lib.rs index 7b0e67a..75137c2 100644 --- a/crates/lex_regex/src/lib.rs +++ b/crates/lex_regex/src/lib.rs @@ -1,7 +1,5 @@ #![feature(gen_blocks)] -use std::rc::Rc; - use regex::{Regex, RegexSet}; use copager_cfg::token::{TokenTag, Token}; @@ -9,9 +7,9 @@ use copager_lex::{LexSource, LexDriver}; #[derive(Debug)] pub struct RegexLexer { - regex_istr: Rc, - regex_set: Rc, - regex_map: Rc>, + regex_istr: Regex, + regex_set: RegexSet, + regex_map: Vec<(Regex, S::Tag)>, } impl LexDriver for RegexLexer { @@ -26,9 +24,9 @@ impl LexDriver for RegexLexer { .collect::>>()?; Ok(RegexLexer { - regex_istr: Rc::new(regex_istr), - regex_set: Rc::new(regex_set), - regex_map: Rc::new(regex_map), + regex_istr, + regex_set, + regex_map, }) }