In [6]:
#-----------------------------------------------------------------------
# import needed libraries
import re
import nltk
from nltk.corpus import words


#-----------------------------------------------------------------------
# Download the word corpus
nltk.download("words")


#-----------------------------------------------------------------------
# define function with parameters

def filter_words(
    word_list, length = None,
    fixed_positions = None,
    contains_letters = None,
    excludes_letters=None,
    not_in_positions=None
    ):

    # Convert all words to lowercase
    filtered_words = [word.lower() for word in word_list]

    # Filter by length
    if length:
        filtered_words = [word for word in filtered_words if len(word) == length]

    # Filter by fixed positions
    if fixed_positions:
        regex = ''.join(
            fixed_positions.get(i, '.') for i in range(1, length + 1)
        )
        pattern = re.compile(regex, re.IGNORECASE)
        filtered_words = [word for word in filtered_words if pattern.fullmatch(word)]

    # Filter by containing letters
    if contains_letters:
        filtered_words = [
            word for word in filtered_words if all(letter in word for letter in contains_letters)
        ]

    # Filter by excluded letters
    if excludes_letters:
        filtered_words = [
            word for word in filtered_words if not any(letter in word for letter in excludes_letters)
        ]

    # Filter by letters not in specific positions
    if not_in_positions:
        for pos, letter in not_in_positions.items():
            filtered_words = [
                word for word in filtered_words if word[pos - 1] != letter.lower()
            ]


    return filtered_words

[nltk_data] Downloading package words to /root/nltk_data...
[nltk_data]   Package words is already up-to-date!


In [9]:
#-----------------------------------------------------------------------
# provide values
if __name__ == "__main__":
    # Load words from nltk
    word_list = words.words()

    # Filters
    word_length = 5
    fixed_positions = {3:'n'} #eg. {3:'a',4:'t'} 3rd letter is 'a',etc.
    not_in_positions = {4:'n'}
    contains_letters = {'u','n'}
    excludes_letters = {'t','r','i','e','d','h','o','s','e','m','a','g','o'}
    # Get filtered words
    results = filter_words(
        word_list,
        length = word_length,
        fixed_positions = fixed_positions,
        not_in_positions = not_in_positions,
        contains_letters = contains_letters,
        excludes_letters = excludes_letters
    )

In [10]:
#-----------------------------------------------------------------------
# provide values
print("Filtered words:", results)

Filtered words: ['funky', 'nunky', 'punky']


In [11]:
#--Example word "prime"
# provide values
if __name__ == "__main__":
    # Load words from nltk
    word_list = words.words()

    # Filters
    word_length = 5
    fixed_positions = {3:'i'} #eg. {3:'a',4:'t'} 3rd letter is 'a',etc.
    contains_letters = {'i','p','r'}
    not_in_positions = {4:'r'}
    excludes_letters = {'t','z','s','b','d','h','o','s','u','n','a','g','o'}
    # Get filtered words
    results = filter_words(
        word_list,
        length = word_length,
        fixed_positions = fixed_positions,
        contains_letters = contains_letters,
        not_in_positions = not_in_positions,
        excludes_letters = excludes_letters
    )

In [12]:
#-----------------------------------------------------------------------
# provide values
print("Filtered words:", results)

Filtered words: ['crimp', 'plier', 'price', 'price', 'prick', 'prier', 'prill', 'prime', 'primp', 'primy', 'privy', 'price']


In [13]:
#--Example word "logic"
# provide values
if __name__ == "__main__":
    # Load words from nltk
    word_list = words.words()

    # Filters
    word_length = 5
    fixed_positions = {1:'l',4:'i'} #eg. {3:'a',4:'t'} 3rd letter is 'a',etc.
    contains_letters = {'i','l','o'}
    not_in_positions = {5:'r'}
    excludes_letters = {'t','z','s','b','d','h','e','s','u','n','a','f','j'}
    # Get filtered words
    results = filter_words(
        word_list,
        length = word_length,
        fixed_positions = fixed_positions,
        contains_letters = contains_letters,
        not_in_positions = not_in_positions,
        excludes_letters = excludes_letters
    )

In [14]:
#-----------------------------------------------------------------------
# provide values
print("Filtered words:", results)

Filtered words: ['logic', 'loric', 'loxic']
