Skip to content

Commit

Permalink
Make some cleaning
Browse files Browse the repository at this point in the history
  • Loading branch information
ManyTheFish committed Mar 13, 2022
1 parent 61503a2 commit e8a83b0
Showing 1 changed file with 21 additions and 12 deletions.
33 changes: 21 additions & 12 deletions src/alphabets/latin.rs
Expand Up @@ -113,7 +113,8 @@ pub fn alphabet_calculate_scores(text: &LowercaseText, filter_list: &FilterList)

// score of each character.
let mut max_raw_score = 0;
let mut scores: Vec<_> = chars.iter().map(|_| 0).collect();
let mut char_scores = vec![0; chars.len()];
// iterate over the text and scores characters.
for ch in text.chars() {
if is_stop_char(ch) {
continue;
Expand All @@ -122,43 +123,51 @@ pub fn alphabet_calculate_scores(text: &LowercaseText, filter_list: &FilterList)
max_raw_score += 1;

if let Ok(position) = chars.binary_search(&ch) {
scores[position] += 2;
// add 2 and remove max_raw_score at the end,
// to keep the score interval of -max_raw_score..max_raw_score
char_scores[position] += 2;
}
}

let mut raw_scores: Vec<_> = (0..Lang::all().len()).into_iter().map(|_| 0).collect();
let mut lang_scores = vec![0; Lang::all().len()];

let mut common_score = 0;
for (position, char_score) in scores.into_iter().enumerate() {
// iterate over scored characters to compute language's scores.
let mut common_score: usize = 0;
for (position, char_score) in char_scores.into_iter().enumerate() {
if char_score > 0 {
let languages = &langs[position];
// if current character is common to all Languages, increment a common score
// instead of iterating over all Languages scores.
if languages.len() == LATIN_ALPHABETS.len() {
common_score += char_score;
} else {
for lang in languages {
raw_scores[*lang as usize] += char_score;
for &lang in languages {
lang_scores[lang as usize] += char_score;
}
}
}
}

// remap languages with theirs scores.
let mut raw_scores: Vec<(Lang, usize)> = Script::Latin
.langs()
.iter()
.filter(|&&l| filter_list.is_allowed(l))
.map(|&l| (l, raw_scores[l as usize]))
.map(|&l| {
(
l,
(lang_scores[l as usize] + common_score).saturating_sub(max_raw_score),
)
})
.collect();

raw_scores.sort_unstable_by(|a, b| b.1.cmp(&a.1));

let mut normalized_scores = vec![];

for (lang, raw_score) in raw_scores.iter_mut() {
*raw_score = (*raw_score + common_score).saturating_sub(max_raw_score);
let normalized_score = *raw_score as f64 / max_raw_score as f64;
normalized_scores.push((*lang, normalized_score));
for &(lang, raw_score) in raw_scores.iter() {
let normalized_score = raw_score as f64 / max_raw_score as f64;
normalized_scores.push((lang, normalized_score));
}

RawOutcome {
Expand Down

0 comments on commit e8a83b0

Please sign in to comment.