In [1]:
import re
from pathlib import Path

### Read the forbidden words

In [2]:
def read_forbidden_words(filename):
    """Read forbidden words from a file"""
    try:
        with open(filename, 'r', encoding='utf-8') as f:
            content = f.read().strip()

            forbidden_words = [word for word in content.split() if word]
        return forbidden_words
    except FileNotFoundError:
        print(f"Error: File {filename} not found.")
        return []

forbidden_words = read_forbidden_words(Path("data", "forbidden_words.txt"))
print("Forbidden words:", forbidden_words)

Forbidden words: ['python', 'program', 'la', 'od']


### Function to censor forbidden words in text

In [3]:
def censor_text(text, forbidden_words):
    """
    Replace all occurrences of forbidden words (case-insensitive) with asterisks.
    Even replaces words that appear within other words.
    """
    censored_text = text
    
    for word in forbidden_words:
        # pattern = re.compile(r'\b' + re.escape(word) + r'\b', re.IGNORECASE)
        pattern = re.compile(re.escape(word), re.IGNORECASE)
        
        censored_text = pattern.sub('*' * len(word), censored_text)
    
    return censored_text

### Read the original text and censor it

In [4]:
def process_files(words_filename, forbidden_filename):
    """Read words file, censor it based on forbidden words, and return the result"""
    forbidden_words = read_forbidden_words(forbidden_filename)
    if not forbidden_words:
        return None
    
    try:
        with open(words_filename, 'r', encoding='utf-8') as f:
            original_text = f.read()
    except FileNotFoundError:
        print(f"Error: File {words_filename} not found.")
        return None
    
    # Censor the text
    censored_text = censor_text(original_text, forbidden_words)
    
    return original_text, censored_text

# Process the files
result = process_files(Path("data", "words.txt"), Path("data", "forbidden_words.txt"))

if result:
    original_text, censored_text = result

### Display the results

In [5]:
if result:
    # print("Original text:")
    # print("=" * 50)
    # print(original_text)
    
    print("\nCensored text:")
    print("=" * 50)
    print(censored_text)


Censored text:
****** is a high-level, interpreted *******ming **nguage that has gained immense popu**rity in recent years. It was first created in the **te 1980s by Guido van Rossum, and since then, it has become one of the most widely used *******ming **nguages in the world.
****** is known for its simplicity and ease of use, which makes it an ideal **nguage for beginners. It has a straightforward syntax and requires less c**e than other *******ming **nguages, which means that developers can focus on solving problems rather than writing long lines of c**e. ******_******_*******ming


### Save the censored text to a new file

In [6]:
if result:
    censored_file = Path("data", "censored_words.txt")
    with open(censored_file, 'w', encoding='utf-8') as f:
        f.write(censored_text)
    print(f"\n✅ Censored text saved to {censored_file}")


✅ Censored text saved to data/censored_words.txt
