Skip to content

Commit

Permalink
exclude empty pos tags in python, lint
Browse files Browse the repository at this point in the history
  • Loading branch information
bminixhofer committed Jan 13, 2021
1 parent 54ce8be commit e76bed4
Show file tree
Hide file tree
Showing 4 changed files with 14 additions and 7 deletions.
9 changes: 8 additions & 1 deletion bindings/python/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -361,7 +361,14 @@ impl PyToken {
.word
.tags
.iter()
.map(|x| self.tagger.id_to_tag(x.pos_id))
.filter_map(|x| {
let pos = self.tagger.id_to_tag(x.pos_id);
if pos.is_empty() {
None
} else {
Some(pos)
}
})
.collect();
tags.sort_unstable();
tags.dedup();
Expand Down
8 changes: 4 additions & 4 deletions nlprule/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -27,16 +27,16 @@ rayon-cond = "0.1.0"
rayon = "1.5"

clap = { version = "3.0.0-beta.1", optional = true }
env_logger = { version = "0.8.1", optional = true }
env_logger = { version = "0.8", optional = true }

serde-xml-rs = { git = "https://github.com/RReverser/serde-xml-rs/", optional = true }
serde-xml-rs = { version = "0.4", optional = true }
xml-rs = { version = "0.8.3", optional = true }
roxmltree = { version = "0.14.0", optional = true }
serde_json = { version = "1", optional = true }

[dev-dependencies]
quickcheck = "0.9"
quickcheck_macros = "0.9"
quickcheck = "1.0"
quickcheck_macros = "1.0"

[features]
compile = ["serde-xml-rs", "xml-rs", "roxmltree", "serde_json"]
Expand Down
2 changes: 1 addition & 1 deletion nlprule/src/rule/from_structure/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -821,7 +821,7 @@ fn parse_pos_filter(postag: &str, postag_regexp: Option<&str>, tagger: &Tagger)
tagger,
)),
Some(_) | None => POSFilter::new(PosMatcher::new(
Matcher::new_string(either::Left(postag.to_string().into()), false, false, true),
Matcher::new_string(either::Left(postag.into()), false, false, true),
tagger,
)),
}
Expand Down
2 changes: 1 addition & 1 deletion nlprule/src/tokenizer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ fn get_token_strs(text: &str) -> Vec<&str> {

/// *Finalizes* the tokens by e. g. adding a specific UNKNOWN part-of-speech tag.
/// After finalization grammatical error correction rules can be used on the tokens.
pub fn finalize<'t>(tokens: Vec<IncompleteToken<'t>>) -> Vec<Token<'t>> {
pub fn finalize(tokens: Vec<IncompleteToken>) -> Vec<Token> {
if tokens.is_empty() {
return Vec::new();
}
Expand Down

0 comments on commit e76bed4

Please sign in to comment.