Skip to content

Commit

Permalink
feat(parse): Make identifier symbols configurable
Browse files Browse the repository at this point in the history
  • Loading branch information
epage committed Aug 7, 2019
1 parent e093135 commit 3419a8d
Show file tree
Hide file tree
Showing 3 changed files with 19 additions and 4 deletions.
7 changes: 4 additions & 3 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ unicase = "1.1"
bstr = "0.2"
log = "0.4"
env_logger = "0.6"
unicode-segmentation = "1.3.0"

[dev-dependencies]
assert_fs = "0.10"
Expand Down
15 changes: 14 additions & 1 deletion src/tokens.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ pub enum Case {
pub struct ParserBuilder {
ignore_hex: bool,
include_digits: bool,
include_chars: String,
}

impl ParserBuilder {
Expand All @@ -27,11 +28,22 @@ impl ParserBuilder {
self
}

pub fn include_chars(&mut self, chars: String) -> &mut Self {
self.include_chars = chars;
self
}

pub fn build(&self) -> Parser {
let mut pattern = r#"\b(\p{Alphabetic}|_|'"#.to_owned();
let mut pattern = r#"\b(\p{Alphabetic}"#.to_owned();
if self.include_digits {
pattern.push_str(r#"|\d"#);
}
for grapheme in
unicode_segmentation::UnicodeSegmentation::graphemes(self.include_chars.as_str(), true)
{
let escaped = regex::escape(&grapheme);
pattern.push_str(&format!("|{}", escaped));
}
pattern.push_str(r#")+\b"#);
let words_str = regex::Regex::new(&pattern).unwrap();
let words_bytes = regex::bytes::Regex::new(&pattern).unwrap();
Expand All @@ -48,6 +60,7 @@ impl Default for ParserBuilder {
Self {
ignore_hex: true,
include_digits: true,
include_chars: "_'".to_owned(),
}
}
}
Expand Down

0 comments on commit 3419a8d

Please sign in to comment.