Skip to content

Commit

Permalink
Maximum one sentence extracting should not return full pool and fix p…
Browse files Browse the repository at this point in the history
…ossible off-by-one error
  • Loading branch information
MichaelKohler committed Apr 17, 2021
1 parent d24d060 commit 5e9c58c
Showing 1 changed file with 31 additions and 29 deletions.
60 changes: 31 additions & 29 deletions src/extractor.rs
Original file line number Diff line number Diff line change
Expand Up @@ -84,14 +84,13 @@ fn pick_sentences(
) -> Vec<String> {
let total_in_pool = sentences_pool.len();

if total_in_pool < amount && amount != std::usize::MAX {
// We do not extract if the total is below the max amount.
// This makes sure that we handle legal requirements correctly
// such as not using the full corpus of a source.
if total_in_pool <= amount && amount != std::usize::MAX {
return vec![];
}

if total_in_pool == 1 {
return sentences_pool;
}

let mut iteration = 0;
let mut chosen_sentences = vec![];
let mut used_indexes = vec![];
Expand Down Expand Up @@ -193,19 +192,24 @@ mod test {
}

#[test]
fn test_pick_sentences_pool_one() {
fn test_pick_sentences_none_valid() {
let rules : Rules = Rules {
..Default::default()
};
let existing_sentences = HashSet::new();
let sentences = vec![String::from("Test")];
let amount = 1;
let sentences = vec![
String::from("Test"),
String::from("Test2"),
String::from("Test3"),
String::from("Test4"),
];
let amount = 3;

assert_eq!(pick_sentences(&rules, sentences, &existing_sentences, amount, check_true)[0], "Test");
assert_eq!(pick_sentences(&rules, sentences, &existing_sentences, amount, check_false).len(), 0);
}

#[test]
fn test_pick_sentences_none_valid() {
fn test_pick_sentences_only_pick_amount() {
let rules : Rules = Rules {
..Default::default()
};
Expand All @@ -218,11 +222,11 @@ mod test {
];
let amount = 3;

assert_eq!(pick_sentences(&rules, sentences, &existing_sentences, amount, check_false).len(), 0);
assert_eq!(pick_sentences(&rules, sentences, &existing_sentences, amount, check_true).len(), 3);
}

#[test]
fn test_pick_sentences_only_pick_amount() {
fn test_pick_sentences_all_if_max_amount() {
let rules : Rules = Rules {
..Default::default()
};
Expand All @@ -232,10 +236,23 @@ mod test {
String::from("Test2"),
String::from("Test3"),
String::from("Test4"),
String::from("Test5"),
];
let amount = 3;
let amount = std::usize::MAX;

assert_eq!(pick_sentences(&rules, sentences, &existing_sentences, amount, check_true).len(), 3);
assert_eq!(pick_sentences(&rules, sentences, &existing_sentences, amount, check_true).len(), 5);
}

#[test]
fn test_pick_sentences_never_all_from_pool_if_not_max() {
let rules : Rules = Rules {
..Default::default()
};
let existing_sentences = HashSet::new();
let sentences = vec![String::from("Test")];
let amount = 1;

assert_eq!(pick_sentences(&rules, sentences, &existing_sentences, amount, check_true).len(), 0);
}

#[test]
Expand Down Expand Up @@ -307,19 +324,4 @@ mod test {

assert_eq!(pick_sentences(&rules, sentences, &existing_sentences, amount, check_true).len(), 2);
}

#[test]
fn test_pick_sentences_two_out_of_two() {
let rules : Rules = Rules {
..Default::default()
};
let existing_sentences = HashSet::new();
let sentences = vec![
String::from("Test"),
String::from("Test2"),
];
let amount = 2;

assert_eq!(pick_sentences(&rules, sentences, &existing_sentences, amount, check_true).len(), 2);
}
}

0 comments on commit 5e9c58c

Please sign in to comment.