Skip to content

Commit

Permalink
Solution to issue snguyenthanh#34
Browse files Browse the repository at this point in the history
  • Loading branch information
emso-c committed Jul 15, 2021
1 parent 7c5cd79 commit 18fada2
Showing 1 changed file with 10 additions and 3 deletions.
13 changes: 10 additions & 3 deletions better_profanity/better_profanity.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ def __init__(self, words=None):

## PUBLIC ##

def censor(self, text, censor_char="*", middle_only=False):
def censor(self, text, censor_char="*", middle_only=False, get_censored_words=False):
"""Replace the swear words in the text with `censor_char`."""

if not isinstance(text, str):
Expand All @@ -64,7 +64,7 @@ def censor(self, text, censor_char="*", middle_only=False):

if not self.CENSOR_WORDSET:
self.load_censor_words()
return self._hide_swear_words(text, censor_char, middle_only)
return self._hide_swear_words(text, censor_char, middle_only, get_censored_words)

def load_censor_words_from_file(self, filename, **kwargs):
words = read_wordlist(filename)
Expand Down Expand Up @@ -146,13 +146,14 @@ def _update_next_words_indices(self, text, words_indices, start_idx):
words_indices += self._get_next_words(text, words_indices[-1][1], 1)
return words_indices

def _hide_swear_words(self, text, censor_char, middle_only=False):
def _hide_swear_words(self, text, censor_char, middle_only=False, get_censored_words=False):
"""Replace the swear words with censor characters."""
censored_text = ""
cur_word = ""
skip_index = -1
next_words_indices = []
start_idx_of_next_word = self._get_start_index_of_next_word(text, 0)
censored_words = []

# If there are no words in the text, return the raw text without parsing
if start_idx_of_next_word >= len(text) - 1:
Expand Down Expand Up @@ -186,6 +187,7 @@ def _hide_swear_words(self, text, censor_char, middle_only=False):
cur_word, next_words_indices, self.CENSOR_WORDSET
)
if contains_swear_word:
censored_words.append(cur_word)
if middle_only:
cur_word = censor_middle_only(cur_word, censor_char)
else:
Expand All @@ -196,6 +198,7 @@ def _hide_swear_words(self, text, censor_char, middle_only=False):

# If the current a swear word
if cur_word.lower() in self.CENSOR_WORDSET:
censored_words.append(cur_word)
if middle_only:
cur_word = censor_middle_only(cur_word, censor_char)
else:
Expand All @@ -207,11 +210,15 @@ def _hide_swear_words(self, text, censor_char, middle_only=False):
# Final check
if cur_word != "" and skip_index < len(text) - 1:
if cur_word.lower() in self.CENSOR_WORDSET:
censored_words.append(cur_word)
if middle_only:
cur_word = censor_middle_only(cur_word, censor_char)
else:
cur_word = get_replacement_for_swear_word(censor_char)
censored_text += cur_word

if get_censored_words:
return censored_text, censored_words
return censored_text

def _get_start_index_of_next_word(self, text, start_idx):
Expand Down

0 comments on commit 18fada2

Please sign in to comment.