Skip to content

Commit

Permalink
Fix rust-lang#168 and using Arc for named groups
Browse files Browse the repository at this point in the history
  • Loading branch information
defuz committed Feb 18, 2016
1 parent 4ad644f commit 422a4b4
Show file tree
Hide file tree
Showing 2 changed files with 90 additions and 64 deletions.
14 changes: 8 additions & 6 deletions src/program.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,9 @@
// option. This file may not be copied, modified, or distributed
// except according to those terms.


use std::{char, cmp};
use std::collections::HashMap;
use std::sync::Arc;

use syntax;

Expand Down Expand Up @@ -57,7 +59,7 @@ pub struct Program {
pub cap_names: Vec<Option<String>>,
/// The map of named capture groups. The keys are group names and
/// the values are group indices.
pub named_groups: ::std::collections::HashMap<String, usize>,
pub named_groups: Arc<HashMap<String, usize>>,
/// If the regular expression requires a literal prefix in order to have a
/// match, that prefix is stored here as a DFA.
pub prefixes: Prefix,
Expand Down Expand Up @@ -89,7 +91,7 @@ impl Program {
let (insts_len, ncaps) = (insts.len(), num_captures(&insts));
let create_threads = move || NfaThreads::new(insts_len, ncaps);
let create_backtrack = move || BackMachine::new();
let mut named_groups = ::std::collections::HashMap::new();
let mut named_groups = HashMap::new();
for (i, name) in cap_names.iter().enumerate() {
if let Some(ref name) = *name {
named_groups.insert(name.to_owned(), i);
Expand All @@ -99,7 +101,7 @@ impl Program {
original: re.into(),
insts: insts,
cap_names: cap_names,
named_groups: named_groups,
named_groups: Arc::new(named_groups),
prefixes: Prefix::Empty,
prefixes_complete: false,
anchored_begin: false,
Expand Down Expand Up @@ -284,7 +286,7 @@ impl Program {
for c in (s as u32)..(e as u32 + 1){
for alt in &orig {
let mut alt = alt.clone();
alt.push(::std::char::from_u32(c).unwrap());
alt.push(char::from_u32(c).unwrap());
alts.push(alt);
}
}
Expand Down Expand Up @@ -346,7 +348,7 @@ fn num_captures(insts: &[Inst]) -> usize {
let mut n = 0;
for inst in insts {
if let Inst::Save(ref inst) = *inst {
n = ::std::cmp::max(n, inst.slot + 1)
n = cmp::max(n, inst.slot + 1)
}
}
// There's exactly 2 Save slots for every capture.
Expand Down
140 changes: 82 additions & 58 deletions src/re.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@ use std::ops::Index;
#[cfg(feature = "pattern")]
use std::str::pattern::{Pattern, Searcher, SearchStep};
use std::str::FromStr;
use std::collections::HashMap;
use std::sync::Arc;

use program::{Program, MatchEngine};
use syntax;
Expand Down Expand Up @@ -416,13 +418,13 @@ impl Regex {
///
/// The `0`th capture group is always unnamed, so it must always be
/// accessed with `at(0)` or `[0]`.
pub fn captures<'r, 't>(&'r self, text: &'t str) -> Option<Captures<'r, 't>> {
pub fn captures<'t>(&self, text: &'t str) -> Option<Captures<'t>> {
let mut locs = self.alloc_captures();
if exec(self, &mut locs, text, 0) {
Some(Captures {
regex: self,
text: text,
locs: locs,
named_groups: NamedGroups::from_regex(self)
})
} else {
None
Expand Down Expand Up @@ -816,6 +818,47 @@ impl<'r, 't> Iterator for RegexSplitsN<'r, 't> {
}
}

enum NamedGroups {
Native(&'static [(&'static str, usize)]),
Dynamic(Arc<HashMap<String, usize>>),
}

impl NamedGroups {
fn from_regex(regex: &Regex) -> NamedGroups {
match *regex {
Regex::Native(ExNative { ref groups, .. }) =>
NamedGroups::Native(groups),
Regex::Dynamic(Program { ref named_groups, .. }) =>
NamedGroups::Dynamic(named_groups.clone())
}
}

fn pos(&self, name: &str) -> Option<usize> {
match *self {
NamedGroups::Native(groups) => {
groups.binary_search_by(|&(n, _)| n.cmp(name))
.ok().map(|i| groups[i].1)
},
NamedGroups::Dynamic(ref groups) => {
groups.get(name).map(|i| *i)
},
}
}

fn iter<'n>(&'n self) -> Box<Iterator<Item=(&'n str, usize)> + 'n> {
match *self {
NamedGroups::Native(groups) => {
Box::new(groups.iter().map(|&v| v))
as Box<Iterator<Item=(&'n str, usize)> + 'n>
},
NamedGroups::Dynamic(ref groups) => {
Box::new(groups.iter().map(|(s, i)| (&s[..], *i)))
as Box<Iterator<Item=(&'n str, usize)> + 'n>
},
}
}
}

/// Captures represents a group of captured strings for a single match.
///
/// The 0th capture always corresponds to the entire match. Each subsequent
Expand All @@ -827,13 +870,13 @@ impl<'r, 't> Iterator for RegexSplitsN<'r, 't> {
/// Positions returned from a capture group are always byte indices.
///
/// `'t` is the lifetime of the matched text.
pub struct Captures<'r, 't> {
regex: &'r Regex,
pub struct Captures<'t> {
text: &'t str,
locs: Vec<Option<usize>>,
named_groups: NamedGroups,
}

impl<'r, 't> Captures<'r, 't> {
impl<'t> Captures<'t> {
/// Returns the start and end positions of the Nth capture group.
/// Returns `None` if `i` is not a valid capture group or if the capture
/// group did not match anything.
Expand Down Expand Up @@ -862,49 +905,29 @@ impl<'r, 't> Captures<'r, 't> {
/// `name` isn't a valid capture group or didn't match anything, then
/// `None` is returned.
pub fn name(&self, name: &str) -> Option<&'t str> {
match *self.regex {
Regex::Native(ExNative { ref groups, .. }) => {
match groups.binary_search_by(|&(n, _)| n.cmp(name)) {
Ok(i) => self.at(groups[i].1),
Err(_) => None
}
},
Regex::Dynamic(Program { ref named_groups, .. }) => {
named_groups.get(name).and_then(|i| self.at(*i))
},
}
self.named_groups.pos(name).and_then(|i| self.at(i))
}

/// Creates an iterator of all the capture groups in order of appearance
/// in the regular expression.
pub fn iter<'c>(&'c self) -> SubCaptures<'c, 'r, 't> {
pub fn iter<'c>(&'c self) -> SubCaptures<'c, 't> {
SubCaptures { idx: 0, caps: self, }
}

/// Creates an iterator of all the capture group positions in order of
/// appearance in the regular expression. Positions are byte indices
/// in terms of the original string matched.
pub fn iter_pos<'c>(&'c self) -> SubCapturesPos<'c, 'r, 't> {
SubCapturesPos { idx: 0, caps: self, }
pub fn iter_pos<'c>(&'c self) -> SubCapturesPos<'c> {
SubCapturesPos { idx: 0, locs: &self.locs }
}

/// Creates an iterator of all named groups as an tuple with the group
/// name and the value. The iterator returns these values in arbitrary
/// order.
pub fn iter_named<'c>(&'c self) -> SubCapturesNamed<'c, 'r, 't> {
let iter = match *self.regex {
Regex::Native(ExNative { ref groups, .. }) => {
Box::new(groups.iter().map(|&v| v))
as Box<Iterator<Item=(&'r str, usize)> + 'r>
},
Regex::Dynamic(Program { ref named_groups, .. }) => {
Box::new(named_groups.iter().map(|(s, i)| (&s[..], *i)))
as Box<Iterator<Item=(&'r str, usize)> + 'r>
},
};
pub fn iter_named<'c: 't>(&'c self) -> SubCapturesNamed<'c, 't> {
SubCapturesNamed {
caps: self,
inner: iter
names: self.named_groups.iter()
}
}

Expand Down Expand Up @@ -948,7 +971,7 @@ impl<'r, 't> Captures<'r, 't> {
///
/// # Panics
/// If there is no group at the given index.
impl<'r, 't> Index<usize> for Captures<'r, 't> {
impl<'t> Index<usize> for Captures<'t> {

type Output = str;

Expand All @@ -962,7 +985,7 @@ impl<'r, 't> Index<usize> for Captures<'r, 't> {
///
/// # Panics
/// If there is no group named by the given value.
impl<'r, 't> Index<&'t str> for Captures<'r, 't> {
impl<'t> Index<&'t str> for Captures<'t> {

type Output = str;

Expand All @@ -979,12 +1002,12 @@ impl<'r, 't> Index<&'t str> for Captures<'r, 't> {
/// expression.
///
/// `'t` is the lifetime of the matched text.
pub struct SubCaptures<'c, 'r: 'c, 't: 'c> {
pub struct SubCaptures<'c, 't: 'c> {
idx: usize,
caps: &'c Captures<'r, 't>,
caps: &'c Captures<'t>,
}

impl<'c, 'r, 't> Iterator for SubCaptures<'c, 'r, 't> {
impl<'c, 't> Iterator for SubCaptures<'c, 't> {
type Item = Option<&'t str>;

fn next(&mut self) -> Option<Option<&'t str>> {
Expand All @@ -1003,41 +1026,42 @@ impl<'c, 'r, 't> Iterator for SubCaptures<'c, 'r, 't> {
/// Positions are byte indices in terms of the original string matched.
///
/// `'t` is the lifetime of the matched text.
pub struct SubCapturesPos<'c, 'r: 'c, 't: 'c> {
pub struct SubCapturesPos<'c> {
idx: usize,
caps: &'c Captures<'r, 't>,
locs: &'c [Option<usize>]
}

impl<'c, 'r, 't> Iterator for SubCapturesPos<'c, 'r, 't> {
impl<'c> Iterator for SubCapturesPos<'c> {
type Item = Option<(usize, usize)>;

fn next(&mut self) -> Option<Option<(usize, usize)>> {
if self.idx < self.caps.len() {
self.idx += 1;
Some(self.caps.pos(self.idx - 1))
} else {
None
if self.idx >= self.locs.len() {
return None
}
let r = match (self.locs[self.idx], self.locs[self.idx + 1]) {
(Some(s), Some(e)) => Some((s, e)),
(None, None) => None,
_ => unreachable!()
};
self.idx += 2;
Some(r)
}
}

/// An Iterator over named capture groups as a tuple with the group
/// name and the value.
///
/// `'t` is the lifetime of the matched text.
pub struct SubCapturesNamed<'c, 'r: 'c, 't: 'c> {
caps: &'c Captures<'r, 't>,
inner: Box<Iterator<Item=(&'r str, usize)> + 'r>,
pub struct SubCapturesNamed<'c, 't: 'c> {
caps: &'c Captures<'t>,
names: Box<Iterator<Item=(&'c str, usize)> + 'c>,
}

impl<'c, 'r, 't> Iterator for SubCapturesNamed<'c, 'r, 't> {
type Item = (&'r str, Option<&'t str>);
impl<'c, 't: 'c> Iterator for SubCapturesNamed<'c, 't> {
type Item = (&'c str, Option<&'t str>);

fn next(&mut self) -> Option<(&'r str, Option<&'t str>)> {
match self.inner.next() {
Some((name, pos)) => Some((name, self.caps.at(pos))),
None => None
}
fn next(&mut self) -> Option<(&'c str, Option<&'t str>)> {
self.names.next().map(|(name, pos)| (name, self.caps.at(pos)))
}
}

Expand All @@ -1056,9 +1080,9 @@ pub struct FindCaptures<'r, 't> {
}

impl<'r, 't> Iterator for FindCaptures<'r, 't> {
type Item = Captures<'r, 't>;
type Item = Captures<'t>;

fn next(&mut self) -> Option<Captures<'r, 't>> {
fn next(&mut self) -> Option<Captures<'t>> {
if self.last_end > self.search.len() {
return None
}
Expand All @@ -1083,9 +1107,9 @@ impl<'r, 't> Iterator for FindCaptures<'r, 't> {
self.skip_next_empty = true;
}
Some(Captures {
regex: self.re,
text: self.search,
locs: caps
locs: caps,
named_groups: NamedGroups::from_regex(self.re),
})
}
}
Expand Down

0 comments on commit 422a4b4

Please sign in to comment.