Skip to content

Commit

Permalink
Add 'censor middle only' feature
Browse files Browse the repository at this point in the history
  • Loading branch information
emso-c committed Jul 15, 2021
1 parent fe977dd commit 7c5cd79
Show file tree
Hide file tree
Showing 3 changed files with 26 additions and 8 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,5 @@
better_profanity.egg-info/
build/
dist/

env/
22 changes: 16 additions & 6 deletions better_profanity/better_profanity.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
get_complete_path_of_file,
get_replacement_for_swear_word,
read_wordlist,
censor_middle_only
)
from .varying_string import VaryingString

Expand Down Expand Up @@ -53,7 +54,7 @@ def __init__(self, words=None):

## PUBLIC ##

def censor(self, text, censor_char="*"):
def censor(self, text, censor_char="*", middle_only=False):
"""Replace the swear words in the text with `censor_char`."""

if not isinstance(text, str):
Expand All @@ -63,7 +64,7 @@ def censor(self, text, censor_char="*"):

if not self.CENSOR_WORDSET:
self.load_censor_words()
return self._hide_swear_words(text, censor_char)
return self._hide_swear_words(text, censor_char, middle_only)

def load_censor_words_from_file(self, filename, **kwargs):
words = read_wordlist(filename)
Expand Down Expand Up @@ -145,7 +146,7 @@ def _update_next_words_indices(self, text, words_indices, start_idx):
words_indices += self._get_next_words(text, words_indices[-1][1], 1)
return words_indices

def _hide_swear_words(self, text, censor_char):
def _hide_swear_words(self, text, censor_char, middle_only=False):
"""Replace the swear words with censor characters."""
censored_text = ""
cur_word = ""
Expand Down Expand Up @@ -185,22 +186,31 @@ def _hide_swear_words(self, text, censor_char):
cur_word, next_words_indices, self.CENSOR_WORDSET
)
if contains_swear_word:
cur_word = get_replacement_for_swear_word(censor_char)
if middle_only:
cur_word = censor_middle_only(cur_word, censor_char)
else:
cur_word = get_replacement_for_swear_word(censor_char)
skip_index = end_index
char = ""
next_words_indices = []

# If the current a swear word
if cur_word.lower() in self.CENSOR_WORDSET:
cur_word = get_replacement_for_swear_word(censor_char)
if middle_only:
cur_word = censor_middle_only(cur_word, censor_char)
else:
cur_word = get_replacement_for_swear_word(censor_char)

censored_text += cur_word + char
cur_word = ""

# Final check
if cur_word != "" and skip_index < len(text) - 1:
if cur_word.lower() in self.CENSOR_WORDSET:
cur_word = get_replacement_for_swear_word(censor_char)
if middle_only:
cur_word = censor_middle_only(cur_word, censor_char)
else:
cur_word = get_replacement_for_swear_word(censor_char)
censored_text += cur_word
return censored_text

Expand Down
10 changes: 8 additions & 2 deletions better_profanity/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,14 @@ def read_wordlist(filename: str):
yield row


def get_replacement_for_swear_word(censor_char):
return censor_char * 4
def get_replacement_for_swear_word(censor_char, n=4):
return censor_char * n


def censor_middle_only(word, censor_char):
if len(word) <= 2:
return '**'
return word[0] + get_replacement_for_swear_word(censor_char, len(word)-2) + word[-1]


def any_next_words_form_swear_word(cur_word, words_indices, censor_words):
Expand Down

0 comments on commit 7c5cd79

Please sign in to comment.