Skip to content

Commit

Permalink
Storing mapping from names to group indices into Regex
Browse files Browse the repository at this point in the history
  • Loading branch information
defuz committed Feb 1, 2016
1 parent aae73b0 commit 4ad644f
Show file tree
Hide file tree
Showing 3 changed files with 91 additions and 63 deletions.
16 changes: 16 additions & 0 deletions regex_macros/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,19 @@ impl<'a> NfaGen<'a> {
None => cx.expr_none(self.sp),
}
);
let named_groups = {
let mut named_groups = ::std::collections::BTreeMap::new();
for (i, name) in self.names.iter().enumerate() {
if let Some(ref name) = *name {
named_groups.insert(name.to_owned(), i);
}
}
self.vec_expr(named_groups.iter(),
&mut |cx, (name, group_idx)|
quote_expr!(cx, ($name, $group_idx))
)
};

let prefix_anchor = self.prog.anchored_begin;

let step_insts = self.step_insts();
Expand All @@ -125,6 +138,8 @@ impl<'a> NfaGen<'a> {
// unused code generated by regex!. See #14185 for an example.
#[allow(dead_code)]
static CAP_NAMES: &'static [Option<&'static str>] = &$cap_names;
#[allow(dead_code)]
static NAMED_GROUPS: &'static [(&'static str, usize)] = &$named_groups;

#[allow(dead_code)]
fn exec<'t>(
Expand Down Expand Up @@ -310,6 +325,7 @@ fn exec<'t>(
::regex::internal::Native(::regex::internal::ExNative {
original: $regex,
names: &CAP_NAMES,
groups: &NAMED_GROUPS,
prog: exec,
})
})
Expand Down
13 changes: 13 additions & 0 deletions src/program.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@
// option. This file may not be copied, modified, or distributed
// except according to those terms.



use syntax;

use Error;
Expand Down Expand Up @@ -53,6 +55,9 @@ pub struct Program {
/// The sequence of capture group names. There is an entry for each capture
/// group index and a name exists only if the capture group is named.
pub cap_names: Vec<Option<String>>,
/// The map of named capture groups. The keys are group names and
/// the values are group indices.
pub named_groups: ::std::collections::HashMap<String, usize>,
/// If the regular expression requires a literal prefix in order to have a
/// match, that prefix is stored here as a DFA.
pub prefixes: Prefix,
Expand Down Expand Up @@ -84,10 +89,17 @@ impl Program {
let (insts_len, ncaps) = (insts.len(), num_captures(&insts));
let create_threads = move || NfaThreads::new(insts_len, ncaps);
let create_backtrack = move || BackMachine::new();
let mut named_groups = ::std::collections::HashMap::new();
for (i, name) in cap_names.iter().enumerate() {
if let Some(ref name) = *name {
named_groups.insert(name.to_owned(), i);
}
}
let mut prog = Program {
original: re.into(),
insts: insts,
cap_names: cap_names,
named_groups: named_groups,
prefixes: Prefix::Empty,
prefixes_complete: false,
anchored_begin: false,
Expand Down Expand Up @@ -317,6 +329,7 @@ impl Clone for Program {
original: self.original.clone(),
insts: self.insts.clone(),
cap_names: self.cap_names.clone(),
named_groups: self.named_groups.clone(),
prefixes: self.prefixes.clone(),
prefixes_complete: self.prefixes_complete,
anchored_begin: self.anchored_begin,
Expand Down
125 changes: 62 additions & 63 deletions src/re.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,6 @@
// except according to those terms.

use std::borrow::Cow;
use std::collections::HashMap;
use std::collections::hash_map::Iter;
use std::fmt;
use std::ops::Index;
#[cfg(feature = "pattern")]
Expand Down Expand Up @@ -186,6 +184,8 @@ pub struct ExNative {
#[doc(hidden)]
pub names: &'static &'static [Option<&'static str>],
#[doc(hidden)]
pub groups: &'static &'static [(&'static str, usize)],
#[doc(hidden)]
pub prog: fn(&mut CaptureIdxs, &str, usize) -> bool,
}

Expand Down Expand Up @@ -416,10 +416,14 @@ impl Regex {
///
/// The `0`th capture group is always unnamed, so it must always be
/// accessed with `at(0)` or `[0]`.
pub fn captures<'t>(&self, text: &'t str) -> Option<Captures<'t>> {
let mut caps = self.alloc_captures();
if exec(self, &mut caps, text, 0) {
Some(Captures::new(self, text, caps))
pub fn captures<'r, 't>(&'r self, text: &'t str) -> Option<Captures<'r, 't>> {
let mut locs = self.alloc_captures();
if exec(self, &mut locs, text, 0) {
Some(Captures {
regex: self,
text: text,
locs: locs,
})
} else {
None
}
Expand Down Expand Up @@ -823,37 +827,13 @@ impl<'r, 't> Iterator for RegexSplitsN<'r, 't> {
/// Positions returned from a capture group are always byte indices.
///
/// `'t` is the lifetime of the matched text.
pub struct Captures<'t> {
pub struct Captures<'r, 't> {
regex: &'r Regex,
text: &'t str,
locs: Vec<Option<usize>>,
named: Option<HashMap<String, usize>>,
}

impl<'t> Captures<'t> {
fn new(
re: &Regex,
search: &'t str,
locs: Vec<Option<usize>>,
) -> Captures<'t> {
let named =
if re.captures_len() == 0 {
None
} else {
let mut named = HashMap::new();
for (i, name) in re.capture_names().enumerate() {
if let Some(name) = name {
named.insert(name.to_owned(), i);
}
}
Some(named)
};
Captures {
text: search,
locs: locs,
named: named,
}
}

impl<'r, 't> Captures<'r, 't> {
/// Returns the start and end positions of the Nth capture group.
/// Returns `None` if `i` is not a valid capture group or if the capture
/// group did not match anything.
Expand Down Expand Up @@ -882,35 +862,50 @@ impl<'t> Captures<'t> {
/// `name` isn't a valid capture group or didn't match anything, then
/// `None` is returned.
pub fn name(&self, name: &str) -> Option<&'t str> {
match self.named {
None => None,
Some(ref h) => {
match h.get(name) {
None => None,
Some(i) => self.at(*i),
match *self.regex {
Regex::Native(ExNative { ref groups, .. }) => {
match groups.binary_search_by(|&(n, _)| n.cmp(name)) {
Ok(i) => self.at(groups[i].1),
Err(_) => None
}
}
},
Regex::Dynamic(Program { ref named_groups, .. }) => {
named_groups.get(name).and_then(|i| self.at(*i))
},
}
}

/// Creates an iterator of all the capture groups in order of appearance
/// in the regular expression.
pub fn iter(&'t self) -> SubCaptures<'t> {
pub fn iter<'c>(&'c self) -> SubCaptures<'c, 'r, 't> {
SubCaptures { idx: 0, caps: self, }
}

/// Creates an iterator of all the capture group positions in order of
/// appearance in the regular expression. Positions are byte indices
/// in terms of the original string matched.
pub fn iter_pos(&'t self) -> SubCapturesPos<'t> {
pub fn iter_pos<'c>(&'c self) -> SubCapturesPos<'c, 'r, 't> {
SubCapturesPos { idx: 0, caps: self, }
}

/// Creates an iterator of all named groups as an tuple with the group
/// name and the value. The iterator returns these values in arbitrary
/// order.
pub fn iter_named(&'t self) -> SubCapturesNamed<'t> {
SubCapturesNamed { caps: self, inner: self.named.as_ref().map(|n| n.iter()) }
pub fn iter_named<'c>(&'c self) -> SubCapturesNamed<'c, 'r, 't> {
let iter = match *self.regex {
Regex::Native(ExNative { ref groups, .. }) => {
Box::new(groups.iter().map(|&v| v))
as Box<Iterator<Item=(&'r str, usize)> + 'r>
},
Regex::Dynamic(Program { ref named_groups, .. }) => {
Box::new(named_groups.iter().map(|(s, i)| (&s[..], *i)))
as Box<Iterator<Item=(&'r str, usize)> + 'r>
},
};
SubCapturesNamed {
caps: self,
inner: iter
}
}

/// Expands all instances of `$name` in `text` to the corresponding capture
Expand Down Expand Up @@ -953,7 +948,7 @@ impl<'t> Captures<'t> {
///
/// # Panics
/// If there is no group at the given index.
impl<'t> Index<usize> for Captures<'t> {
impl<'r, 't> Index<usize> for Captures<'r, 't> {

type Output = str;

Expand All @@ -967,7 +962,7 @@ impl<'t> Index<usize> for Captures<'t> {
///
/// # Panics
/// If there is no group named by the given value.
impl<'t> Index<&'t str> for Captures<'t> {
impl<'r, 't> Index<&'t str> for Captures<'r, 't> {

type Output = str;

Expand All @@ -984,12 +979,12 @@ impl<'t> Index<&'t str> for Captures<'t> {
/// expression.
///
/// `'t` is the lifetime of the matched text.
pub struct SubCaptures<'t> {
pub struct SubCaptures<'c, 'r: 'c, 't: 'c> {
idx: usize,
caps: &'t Captures<'t>,
caps: &'c Captures<'r, 't>,
}

impl<'t> Iterator for SubCaptures<'t> {
impl<'c, 'r, 't> Iterator for SubCaptures<'c, 'r, 't> {
type Item = Option<&'t str>;

fn next(&mut self) -> Option<Option<&'t str>> {
Expand All @@ -1008,12 +1003,12 @@ impl<'t> Iterator for SubCaptures<'t> {
/// Positions are byte indices in terms of the original string matched.
///
/// `'t` is the lifetime of the matched text.
pub struct SubCapturesPos<'t> {
pub struct SubCapturesPos<'c, 'r: 'c, 't: 'c> {
idx: usize,
caps: &'t Captures<'t>,
caps: &'c Captures<'r, 't>,
}

impl<'t> Iterator for SubCapturesPos<'t> {
impl<'c, 'r, 't> Iterator for SubCapturesPos<'c, 'r, 't> {
type Item = Option<(usize, usize)>;

fn next(&mut self) -> Option<Option<(usize, usize)>> {
Expand All @@ -1030,17 +1025,17 @@ impl<'t> Iterator for SubCapturesPos<'t> {
/// name and the value.
///
/// `'t` is the lifetime of the matched text.
pub struct SubCapturesNamed<'t>{
caps: &'t Captures<'t>,
inner: Option<Iter<'t, String, usize>>,
pub struct SubCapturesNamed<'c, 'r: 'c, 't: 'c> {
caps: &'c Captures<'r, 't>,
inner: Box<Iterator<Item=(&'r str, usize)> + 'r>,
}

impl<'t> Iterator for SubCapturesNamed<'t> {
type Item = (&'t str, Option<&'t str>);
impl<'c, 'r, 't> Iterator for SubCapturesNamed<'c, 'r, 't> {
type Item = (&'r str, Option<&'t str>);

fn next(&mut self) -> Option<(&'t str, Option<&'t str>)> {
match self.inner.as_mut().map_or(None, |it| it.next()) {
Some((name, pos)) => Some((name, self.caps.at(*pos))),
fn next(&mut self) -> Option<(&'r str, Option<&'t str>)> {
match self.inner.next() {
Some((name, pos)) => Some((name, self.caps.at(pos))),
None => None
}
}
Expand All @@ -1061,9 +1056,9 @@ pub struct FindCaptures<'r, 't> {
}

impl<'r, 't> Iterator for FindCaptures<'r, 't> {
type Item = Captures<'t>;
type Item = Captures<'r, 't>;

fn next(&mut self) -> Option<Captures<'t>> {
fn next(&mut self) -> Option<Captures<'r, 't>> {
if self.last_end > self.search.len() {
return None
}
Expand All @@ -1087,7 +1082,11 @@ impl<'r, 't> Iterator for FindCaptures<'r, 't> {
self.last_end = e;
self.skip_next_empty = true;
}
Some(Captures::new(self.re, self.search, caps))
Some(Captures {
regex: self.re,
text: self.search,
locs: caps
})
}
}

Expand Down

0 comments on commit 4ad644f

Please sign in to comment.