Add 'censor middle only' feature

emso-c · Jul 15, 2021 · 7c5cd79 · 7c5cd79
1 parent fe977dd
commit 7c5cd79
Show file tree

Hide file tree

Showing 3 changed files with 26 additions and 8 deletions.
diff --git a/.gitignore b/.gitignore
@@ -2,3 +2,5 @@
 better_profanity.egg-info/
 build/
 dist/
+
+env/
diff --git a/better_profanity/better_profanity.py b/better_profanity/better_profanity.py
@@ -8,6 +8,7 @@
     get_complete_path_of_file,
     get_replacement_for_swear_word,
     read_wordlist,
+    censor_middle_only
 )
 from .varying_string import VaryingString
 
@@ -53,7 +54,7 @@ def __init__(self, words=None):
 
     ## PUBLIC ##
 
-    def censor(self, text, censor_char="*"):
+    def censor(self, text, censor_char="*", middle_only=False):
         """Replace the swear words in the text with `censor_char`."""
 
         if not isinstance(text, str):
@@ -63,7 +64,7 @@ def censor(self, text, censor_char="*"):
 
         if not self.CENSOR_WORDSET:
             self.load_censor_words()
-        return self._hide_swear_words(text, censor_char)
+        return self._hide_swear_words(text, censor_char, middle_only)
 
     def load_censor_words_from_file(self, filename, **kwargs):
         words = read_wordlist(filename)
@@ -145,7 +146,7 @@ def _update_next_words_indices(self, text, words_indices, start_idx):
                 words_indices += self._get_next_words(text, words_indices[-1][1], 1)
         return words_indices
 
-    def _hide_swear_words(self, text, censor_char):
+    def _hide_swear_words(self, text, censor_char, middle_only=False):
         """Replace the swear words with censor characters."""
         censored_text = ""
         cur_word = ""
@@ -185,22 +186,31 @@ def _hide_swear_words(self, text, censor_char):
                 cur_word, next_words_indices, self.CENSOR_WORDSET
             )
             if contains_swear_word:
-                cur_word = get_replacement_for_swear_word(censor_char)
+                if middle_only:
+                    cur_word = censor_middle_only(cur_word, censor_char)
+                else:
+                    cur_word = get_replacement_for_swear_word(censor_char)
                 skip_index = end_index
                 char = ""
                 next_words_indices = []
 
             # If the current a swear word
             if cur_word.lower() in self.CENSOR_WORDSET:
-                cur_word = get_replacement_for_swear_word(censor_char)
+                if middle_only:
+                    cur_word = censor_middle_only(cur_word, censor_char)
+                else:
+                    cur_word = get_replacement_for_swear_word(censor_char)
 
             censored_text += cur_word + char
             cur_word = ""
 
         # Final check
         if cur_word != "" and skip_index < len(text) - 1:
             if cur_word.lower() in self.CENSOR_WORDSET:
-                cur_word = get_replacement_for_swear_word(censor_char)
+                if middle_only:
+                    cur_word = censor_middle_only(cur_word, censor_char)
+                else:
+                    cur_word = get_replacement_for_swear_word(censor_char)
             censored_text += cur_word
         return censored_text
 

diff --git a/better_profanity/utils.py b/better_profanity/utils.py
@@ -18,8 +18,14 @@ def read_wordlist(filename: str):
                 yield row
 
 
-def get_replacement_for_swear_word(censor_char):
-    return censor_char * 4
+def get_replacement_for_swear_word(censor_char, n=4):
+    return censor_char * n
+
+
+def censor_middle_only(word, censor_char):
+    if len(word) <= 2:
+        return '**'
+    return word[0] + get_replacement_for_swear_word(censor_char, len(word)-2) + word[-1]
 
 
 def any_next_words_form_swear_word(cur_word, words_indices, censor_words):