Skip to content

Commit

Permalink
remove unneccesary vec allocations
Browse files Browse the repository at this point in the history
  • Loading branch information
bminixhofer committed Jan 14, 2021
1 parent fd894d3 commit af53af1
Show file tree
Hide file tree
Showing 2 changed files with 22 additions and 24 deletions.
44 changes: 21 additions & 23 deletions nlprule/src/rule/engine/composition.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
use crate::{
tokenizer::tag::Tagger,
types::{Token, WordId},
types::{Token, WordData, WordId},
utils::{parallelism::MaybeParallelIterator, regex::SerializeRegex},
};
use enum_dispatch::enum_dispatch;
Expand Down Expand Up @@ -197,12 +197,15 @@ impl WordDataMatcher {

pub fn is_match(
&self,
input: &[(u16, &WordId)],
input: &[WordData],
graph: &MatchGraph,
case_sensitive: Option<bool>,
) -> bool {
input.iter().any(|x| {
let pos_matches = self.pos_matcher.as_ref().map_or(true, |m| m.is_match(x.0));
let pos_matches = self
.pos_matcher
.as_ref()
.map_or(true, |m| m.is_match(x.pos_id));

// matching part-of-speech tag is faster than inflection, so check POS first and early exit if it doesn't match
if !pos_matches {
Expand All @@ -212,7 +215,7 @@ impl WordDataMatcher {
let inflect_matches = self
.inflect_matcher
.as_ref()
.map_or(true, |m| m.is_match(x.1, graph, case_sensitive));
.map_or(true, |m| m.is_match(&x.lemma, graph, case_sensitive));

inflect_matches
})
Expand Down Expand Up @@ -319,14 +322,8 @@ pub mod concrete {
fn is_match(&self, input: &[&Token], graph: &MatchGraph, position: usize) -> bool {
let tags = &input[position].word.tags;

self.matcher.is_match(
&tags
.iter()
.map(|x| (x.pos_id, &x.lemma))
.collect::<Vec<_>>(),
graph,
Some(self.case_sensitive),
)
self.matcher
.is_match(&tags, graph, Some(self.case_sensitive))
}
}

Expand Down Expand Up @@ -532,7 +529,7 @@ impl Group {
pub struct MatchGraph<'t> {
groups: Vec<Group>,
id_to_idx: &'t FnvHashMap<usize, usize>,
tokens: Vec<&'t Token<'t>>,
tokens: &'t [&'t Token<'t>],
}

lazy_static! {
Expand All @@ -544,7 +541,7 @@ impl<'t> Default for MatchGraph<'t> {
MatchGraph {
groups: Vec::new(),
id_to_idx: &(*EMPTY_MAP),
tokens: Vec::new(),
tokens: &[],
}
}
}
Expand All @@ -553,7 +550,7 @@ impl<'t> MatchGraph<'t> {
pub fn new(
groups: Vec<Group>,
id_to_idx: &'t FnvHashMap<usize, usize>,
tokens: Vec<&'t Token<'t>>,
tokens: &'t [&'t Token<'t>],
) -> Self {
MatchGraph {
groups,
Expand Down Expand Up @@ -683,9 +680,9 @@ impl Composition {
}
}

fn next_can_match(
fn next_can_match<'t>(
&self,
tokens: &[&Token],
tokens: &'t [&'t Token<'t>],
graph: &MatchGraph,
position: usize,
index: usize,
Expand All @@ -694,10 +691,7 @@ impl Composition {
return false;
}

let next_required_pos = match self.parts[index + 1..]
.iter()
.position(|x| x.quantifier.min > 0)
{
let next_required_pos = match self.can_stop_mask[index + 1..].iter().position(|x| !x) {
Some(pos) => index + 1 + pos + 1,
None => self.parts.len(),
};
Expand All @@ -707,7 +701,11 @@ impl Composition {
.any(|x| x.atom.is_match(tokens, graph, position))
}

pub fn apply<'t>(&'t self, tokens: &[&'t Token<'t>], start: usize) -> Option<MatchGraph<'t>> {
pub fn apply<'t>(
&'t self,
tokens: &'t [&'t Token<'t>],
start: usize,
) -> Option<MatchGraph<'t>> {
// this path is extremely hot so more optimizations are done

// the first matcher can never rely on the match graph, so we use an empty default graph for the first match
Expand All @@ -729,7 +727,7 @@ impl Composition {
let mut graph = MatchGraph::new(
vec![Group::default(); self.parts.len() + 1],
&self.group_ids_to_idx,
tokens.to_vec(),
tokens,
);

let mut is_match = loop {
Expand Down
2 changes: 1 addition & 1 deletion nlprule/src/rule/engine/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,7 @@ impl Engine {
}
}

MatchGraph::new(groups, id_to_idx, tokens.to_vec())
MatchGraph::new(groups, id_to_idx, tokens)
}));
}
}
Expand Down

0 comments on commit af53af1

Please sign in to comment.