From cfe0f972a81d6d27960d08f5a93bb8495be72c78 Mon Sep 17 00:00:00 2001 From: emso-c <32599085+emso-c@users.noreply.github.com> Date: Thu, 15 Jul 2021 14:10:26 +0300 Subject: [PATCH] Solution to issue #34 --- better_profanity/better_profanity.py | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/better_profanity/better_profanity.py b/better_profanity/better_profanity.py index 3c4d3e9..0349447 100644 --- a/better_profanity/better_profanity.py +++ b/better_profanity/better_profanity.py @@ -54,7 +54,7 @@ def __init__(self, words=None): ## PUBLIC ## - def censor(self, text, censor_char="*", middle_only=False): + def censor(self, text, censor_char="*", middle_only=False, get_censored_words=False): """Replace the swear words in the text with `censor_char`.""" if not isinstance(text, str): @@ -64,7 +64,7 @@ def censor(self, text, censor_char="*", middle_only=False): if not self.CENSOR_WORDSET: self.load_censor_words() - return self._hide_swear_words(text, censor_char, middle_only) + return self._hide_swear_words(text, censor_char, middle_only, get_censored_words) def load_censor_words_from_file(self, filename, **kwargs): words = read_wordlist(filename) @@ -146,13 +146,14 @@ def _update_next_words_indices(self, text, words_indices, start_idx): words_indices += self._get_next_words(text, words_indices[-1][1], 1) return words_indices - def _hide_swear_words(self, text, censor_char, middle_only=False): + def _hide_swear_words(self, text, censor_char, middle_only=False, get_censored_words=False): """Replace the swear words with censor characters.""" censored_text = "" cur_word = "" skip_index = -1 next_words_indices = [] start_idx_of_next_word = self._get_start_index_of_next_word(text, 0) + censored_words = [] # If there are no words in the text, return the raw text without parsing if start_idx_of_next_word >= len(text) - 1: @@ -186,6 +187,7 @@ def _hide_swear_words(self, text, censor_char, middle_only=False): cur_word, next_words_indices, self.CENSOR_WORDSET ) if contains_swear_word: + censored_words.append(cur_word) if middle_only: cur_word = censor_middle_only(cur_word, censor_char) else: @@ -196,6 +198,7 @@ def _hide_swear_words(self, text, censor_char, middle_only=False): # If the current a swear word if cur_word.lower() in self.CENSOR_WORDSET: + censored_words.append(cur_word) if middle_only: cur_word = censor_middle_only(cur_word, censor_char) else: @@ -207,11 +210,15 @@ def _hide_swear_words(self, text, censor_char, middle_only=False): # Final check if cur_word != "" and skip_index < len(text) - 1: if cur_word.lower() in self.CENSOR_WORDSET: + censored_words.append(cur_word) if middle_only: cur_word = censor_middle_only(cur_word, censor_char) else: cur_word = get_replacement_for_swear_word(censor_char) censored_text += cur_word + + if get_censored_words: + return censored_text, censored_words return censored_text def _get_start_index_of_next_word(self, text, start_idx):