Skip to content

Commit

Permalink
Merge pull request #13 from epage/fix
Browse files Browse the repository at this point in the history
 fix(dict): Fix should match typo's case
  • Loading branch information
epage committed Jun 26, 2019
2 parents 3d1fb3b + 953064e commit 5ef8153
Show file tree
Hide file tree
Showing 6 changed files with 250 additions and 87 deletions.
5 changes: 4 additions & 1 deletion benches/corrections.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,10 @@ fn load_corrections(b: &mut test::Bencher) {
fn correct_word_hit(b: &mut test::Bencher) {
let corrections = defenestrate::Dictionary::new();
let input = defenestrate::tokens::Word::new("successs", 0).unwrap();
assert_eq!(corrections.correct_word(input), Some("successes"));
assert_eq!(
corrections.correct_word(input),
Some(std::borrow::Cow::Borrowed("successes"))
);
b.iter(|| corrections.correct_word(input));
}

Expand Down
20 changes: 10 additions & 10 deletions benches/tokenize.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,60 +6,60 @@ mod data;

#[bench]
fn symbol_parse_empty(b: &mut test::Bencher) {
b.iter(|| defenestrate::tokens::Symbol::parse(data::EMPTY.as_bytes()).last());
b.iter(|| defenestrate::tokens::Identifier::parse(data::EMPTY.as_bytes()).last());
}

#[bench]
fn symbol_parse_no_tokens(b: &mut test::Bencher) {
b.iter(|| defenestrate::tokens::Symbol::parse(data::NO_TOKENS.as_bytes()).last());
b.iter(|| defenestrate::tokens::Identifier::parse(data::NO_TOKENS.as_bytes()).last());
}

#[bench]
fn symbol_parse_single_token(b: &mut test::Bencher) {
b.iter(|| {
defenestrate::tokens::Symbol::parse(data::SINGLE_TOKEN.as_bytes()).last();
defenestrate::tokens::Identifier::parse(data::SINGLE_TOKEN.as_bytes()).last();
});
}

#[bench]
fn symbol_parse_sherlock(b: &mut test::Bencher) {
b.iter(|| defenestrate::tokens::Symbol::parse(data::SHERLOCK.as_bytes()).last());
b.iter(|| defenestrate::tokens::Identifier::parse(data::SHERLOCK.as_bytes()).last());
}

#[bench]
fn symbol_parse_code(b: &mut test::Bencher) {
b.iter(|| defenestrate::tokens::Symbol::parse(data::CODE.as_bytes()).last());
b.iter(|| defenestrate::tokens::Identifier::parse(data::CODE.as_bytes()).last());
}

#[bench]
fn symbol_parse_corpus(b: &mut test::Bencher) {
b.iter(|| defenestrate::tokens::Symbol::parse(data::CORPUS.as_bytes()).last());
b.iter(|| defenestrate::tokens::Identifier::parse(data::CORPUS.as_bytes()).last());
}

#[bench]
fn symbol_split_lowercase_short(b: &mut test::Bencher) {
let input = "abcabcabcabc";
let symbol = defenestrate::tokens::Symbol::new(input, 0).unwrap();
let symbol = defenestrate::tokens::Identifier::new(input, 0).unwrap();
b.iter(|| symbol.split().last());
}

#[bench]
fn symbol_split_lowercase_long(b: &mut test::Bencher) {
let input = "abcabcabcabc".repeat(90);
let symbol = defenestrate::tokens::Symbol::new(&input, 0).unwrap();
let symbol = defenestrate::tokens::Identifier::new(&input, 0).unwrap();
b.iter(|| symbol.split().last());
}

#[bench]
fn symbol_split_mixed_short(b: &mut test::Bencher) {
let input = "abcABCAbc123";
let symbol = defenestrate::tokens::Symbol::new(input, 0).unwrap();
let symbol = defenestrate::tokens::Identifier::new(input, 0).unwrap();
b.iter(|| symbol.split().last());
}

#[bench]
fn symbol_split_mixed_long(b: &mut test::Bencher) {
let input = "abcABCAbc123".repeat(90);
let symbol = defenestrate::tokens::Symbol::new(&input, 0).unwrap();
let symbol = defenestrate::tokens::Identifier::new(&input, 0).unwrap();
b.iter(|| symbol.split().last());
}
54 changes: 52 additions & 2 deletions src/dict.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
use std::borrow::Cow;

use unicase::UniCase;

use crate::tokens::Case;

#[derive(Default)]
pub struct Dictionary {}

Expand All @@ -8,12 +12,16 @@ impl Dictionary {
Dictionary {}
}

pub fn correct_symbol<'s, 'w>(&'s self, _sym: crate::tokens::Symbol<'w>) -> Option<&'s str> {
pub fn correct_ident<'s, 'w>(
&'s self,
_ident: crate::tokens::Identifier<'w>,
) -> Option<Cow<'s, str>> {
None
}

pub fn correct_word<'s, 'w>(&'s self, word: crate::tokens::Word<'w>) -> Option<&'s str> {
pub fn correct_word<'s, 'w>(&'s self, word: crate::tokens::Word<'w>) -> Option<Cow<'s, str>> {
map_lookup(&crate::dict_codegen::WORD_DICTIONARY, word.token())
.map(|s| case_correct(s, word.case()))
}
}

Expand All @@ -32,3 +40,45 @@ fn map_lookup(
map.get(&UniCase(key)).cloned()
}
}

fn case_correct(correction: &str, case: Case) -> Cow<'_, str> {
match case {
Case::Lower | Case::None => correction.into(),
Case::Title => {
let mut title = String::with_capacity(correction.as_bytes().len());
let mut char_indices = correction.char_indices();
if let Some((_, c)) = char_indices.next() {
title.extend(c.to_uppercase());
if let Some((i, _)) = char_indices.next() {
title.push_str(&correction[i..]);
}
}
title.into()
}
Case::Scream => correction
.chars()
.flat_map(|c| c.to_uppercase())
.collect::<String>()
.into(),
}
}

#[cfg(test)]
mod test {
use super::*;

#[test]
fn test_case_correct() {
let cases = [
("foo", Case::Lower, "foo"),
("foo", Case::None, "foo"),
("foo", Case::Title, "Foo"),
("foo", Case::Scream, "FOO"),
("fOo", Case::None, "fOo"),
];
for (correction, case, expected) in cases.iter() {
let actual = case_correct(correction, *case);
assert_eq!(*expected, actual);
}
}
}
12 changes: 6 additions & 6 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,29 +21,29 @@ pub fn process_file(
File::open(path)?.read_to_end(&mut buffer)?;
for (line_idx, line) in grep_searcher::LineIter::new(b'\n', &buffer).enumerate() {
let line_num = line_idx + 1;
for symbol in tokens::Symbol::parse(line) {
if let Some(correction) = dictionary.correct_symbol(symbol) {
let col_num = symbol.offset();
for ident in tokens::Identifier::parse(line) {
if let Some(correction) = dictionary.correct_ident(ident) {
let col_num = ident.offset();
let msg = report::Message {
path,
line,
line_num,
col_num,
word: symbol.token(),
typo: ident.token(),
correction,
non_exhaustive: (),
};
report(msg);
}
for word in symbol.split() {
for word in ident.split() {
if let Some(correction) = dictionary.correct_word(word) {
let col_num = word.offset();
let msg = report::Message {
path,
line,
line_num,
col_num,
word: word.token(),
typo: word.token(),
correction,
non_exhaustive: (),
};
Expand Down
44 changes: 28 additions & 16 deletions src/report.rs
Original file line number Diff line number Diff line change
@@ -1,12 +1,15 @@
#[derive(Copy, Clone, Debug, Serialize)]
use std::borrow::Cow;
use std::io::{self, Write};

#[derive(Clone, Debug, Serialize)]
pub struct Message<'m> {
pub path: &'m std::path::Path,
#[serde(skip)]
pub line: &'m [u8],
pub line_num: usize,
pub col_num: usize,
pub word: &'m str,
pub correction: &'m str,
pub typo: &'m str,
pub correction: Cow<'m, str>,
#[serde(skip)]
pub(crate) non_exhaustive: (),
}
Expand All @@ -21,7 +24,7 @@ pub fn print_brief(msg: Message) {
msg.path.display(),
msg.line_num,
msg.col_num,
msg.word,
msg.typo,
msg.correction
);
}
Expand All @@ -31,23 +34,32 @@ pub fn print_long(msg: Message) {
let line_indent: String = itertools::repeat_n(" ", line_num.len()).collect();

let hl_indent: String = itertools::repeat_n(" ", msg.col_num).collect();
let hl: String = itertools::repeat_n("^", msg.word.len()).collect();
let hl: String = itertools::repeat_n("^", msg.typo.len()).collect();

println!("error: `{}` should be `{}`", msg.word, msg.correction);
println!(
let line = String::from_utf8_lossy(msg.line);
let line = line.replace("\t", " ");

let stdout = io::stdout();
let mut handle = stdout.lock();

writeln!(
handle,
"error: `{}` should be `{}`",
msg.typo, msg.correction
)
.unwrap();
writeln!(
handle,
" --> {}:{}:{}",
msg.path.display(),
msg.line_num,
msg.col_num
);
println!("{} |", line_indent);
println!(
"{} | {}",
msg.line_num,
String::from_utf8_lossy(msg.line).trim_end()
);
println!("{} | {}{}", line_indent, hl_indent, hl);
println!("{} |", line_indent);
)
.unwrap();
writeln!(handle, "{} |", line_indent).unwrap();
writeln!(handle, "{} | {}", msg.line_num, line.trim_end()).unwrap();
writeln!(handle, "{} | {}{}", line_indent, hl_indent, hl).unwrap();
writeln!(handle, "{} |", line_indent).unwrap();
}

pub fn print_json(msg: Message) {
Expand Down

0 comments on commit 5ef8153

Please sign in to comment.