# Find Candidate Words

Suppose you are working on a puzzle which is one of the many variants of Wordle and are stuck on a word - maybe you know some of the letters but can figure out how to arrange them to find the correct word.

Below is a method you can use to create word hints.

In [1]:
# Import the necessary libaries
import itertools
import numpy as np
import requests
import json

In [2]:
# We will use this API to obtain a list of possible matching words given the known letters in your word
# Full documentation of the API endpoints and syntax located at https://www.datamuse.com/api/
api_url = "https://api.datamuse.com/words?sp="

In [3]:
# The length of the word you want to generate
word_length = 5 # Typical for Wordle

# The letters of the word you already know in the form (letter, position)
# where letter is a single alphabetic character, and position is a value
# between 0 and the length of the word - 1 (0-based indexing)
# or -1 (signifying an unknown position)

# This is just an example where we know the letter "i" will be in position 1, and "c" in position 4
# We also know that the word contains the letters "a" and "l" but without knowing the exat position
# The actual word is "lilac"
known_letters_loc = [
      ('a', -1)
    , ('i', 1)
    , ('c', 4)
    , ('l', -1)
]

In [4]:
# First extract the letters for which we know their location
fixed_letters = sorted([l for l in known_letters_loc if l[1] >= 0], key=lambda k: k[1])
fixed_letters

[('i', 1), ('c', 4)]

In [5]:
# We also need just the letters without their position for calling the API
known_letters = [l[0] for l in known_letters_loc]
known_letters

['a', 'i', 'c', 'l']

In [6]:
# To call the API, we need to pass all possible permutations of the letters which we know exist
known_combos = list(itertools.permutations(known_letters))
known_combos

[('a', 'i', 'c', 'l'),
 ('a', 'i', 'l', 'c'),
 ('a', 'c', 'i', 'l'),
 ('a', 'c', 'l', 'i'),
 ('a', 'l', 'i', 'c'),
 ('a', 'l', 'c', 'i'),
 ('i', 'a', 'c', 'l'),
 ('i', 'a', 'l', 'c'),
 ('i', 'c', 'a', 'l'),
 ('i', 'c', 'l', 'a'),
 ('i', 'l', 'a', 'c'),
 ('i', 'l', 'c', 'a'),
 ('c', 'a', 'i', 'l'),
 ('c', 'a', 'l', 'i'),
 ('c', 'i', 'a', 'l'),
 ('c', 'i', 'l', 'a'),
 ('c', 'l', 'a', 'i'),
 ('c', 'l', 'i', 'a'),
 ('l', 'a', 'i', 'c'),
 ('l', 'a', 'c', 'i'),
 ('l', 'i', 'a', 'c'),
 ('l', 'i', 'c', 'a'),
 ('l', 'c', 'a', 'i'),
 ('l', 'c', 'i', 'a')]

In [7]:
# From those permutations, build the query portion of the API
known_patterns = ['*'+('*'.join(c))+'*' for c in known_combos]
known_patterns

['*a*i*c*l*',
 '*a*i*l*c*',
 '*a*c*i*l*',
 '*a*c*l*i*',
 '*a*l*i*c*',
 '*a*l*c*i*',
 '*i*a*c*l*',
 '*i*a*l*c*',
 '*i*c*a*l*',
 '*i*c*l*a*',
 '*i*l*a*c*',
 '*i*l*c*a*',
 '*c*a*i*l*',
 '*c*a*l*i*',
 '*c*i*a*l*',
 '*c*i*l*a*',
 '*c*l*a*i*',
 '*c*l*i*a*',
 '*l*a*i*c*',
 '*l*a*c*i*',
 '*l*i*a*c*',
 '*l*i*c*a*',
 '*l*c*a*i*',
 '*l*c*i*a*']

In [8]:
# Now, loop through each of the query patterns calling the API
candidate_words = []
for p in known_patterns:
    req_url = api_url + p
    resp = requests.get(req_url)
    words = json.loads(resp.text)
    # The API will return many words that do not match our required length so we filter out the non-matching lengths
    good_words = [w['word'] for w in words if len(w['word'])==word_length]
    if len(good_words) > 0:
        # If we still have some words of the correct length remaining, add them to the list
        candidate_words.extend(good_words)

print(candidate_words)

['calic', 'alice', 'lilac', 'iliac', 'ileac', 'cavil', 'cilia', 'claim', 'clair', 'cilia', 'lilac', 'iliac', 'linac', 'lucia']


In [9]:
# Now we can apply filtering on the remaining words where the letter and position are both known
# First convert the letters in all of the words to a numpy array (syntactically easier to filter)
candidate_arr = np.unique(np.array([list(w) for w in candidate_words]), axis=0)
candidate_arr

array([['a', 'l', 'i', 'c', 'e'],
       ['c', 'a', 'l', 'i', 'c'],
       ['c', 'a', 'v', 'i', 'l'],
       ['c', 'i', 'l', 'i', 'a'],
       ['c', 'l', 'a', 'i', 'm'],
       ['c', 'l', 'a', 'i', 'r'],
       ['i', 'l', 'e', 'a', 'c'],
       ['i', 'l', 'i', 'a', 'c'],
       ['l', 'i', 'l', 'a', 'c'],
       ['l', 'i', 'n', 'a', 'c'],
       ['l', 'u', 'c', 'i', 'a']], dtype='<U1')

In [10]:
# Loop through each of the known letter positions and apply the filter of this letter to its position int he word
for f in fixed_letters:
    candidate_arr = candidate_arr[np.argwhere(candidate_arr[:, f[1]] == f[0]).reshape(1, -1)[0]]

# Convert the array of letters remaining into words remaining and display them
list_candidate_words = [''.join(l) for l in candidate_arr]
print(list_candidate_words)

['lilac', 'linac']
