In [None]:
import random

import numpy as np
from tqdm.notebook import tqdm

from words import allowed_words as wordle_words

In [None]:
# check a word against another word and extract the matching letters
# word - the word to check
# other_word - the word to check against


def check_word(word, other_word):
    if len(word) != len(other_word):
        print(f"!!! Word lengths don't match '{word}', '{other_word}'")
        return (None, None, None, None)
    # get the list of letters that are not in the word
    letters_not_in_word = {letter for letter in word if letter not in other_word}
    # get the list of letters that are in the word
    letters_in_word = {letter for letter in word if letter in other_word}
    # get the list of letters that are in the word in the correct position
    letters_correct_position = [
        (letter_a, i)
        for i, (letter_a, letter_b) in enumerate(zip(word, other_word))
        if letter_a == letter_b
    ]
    # get the list of letters that are in the word in the incorrect position
    letters_wrong_position = [
        (letter_a, i)
        for i, (letter_a, letter_b) in enumerate(zip(word, other_word))
        if letter_a != letter_b and letter_a in letters_in_word
    ]
    return (
        letters_correct_position,
        letters_wrong_position,
        letters_in_word,
        letters_not_in_word,
    )

In [None]:
# Filter the list of words by the guess
# words - the list of words to filter
# letters_correct_position - a tuple with known letters and their positions
# letters_wrong_position - a tuple with known letters in the incorrect positions
# letters_in_word - a set of letters that we know are in the word
# letters_not_in_word - a set of letters that we know are not in the word
def filter_words(
    words,
    letters_correct_position,
    letters_wrong_position,
    letters_in_word,
    letters_not_in_word,
):
    # all the letters in the letters_in_word should be in the word
    words = [
        word for word in words if all(letter in word for letter in letters_in_word)
    ]
    # none of the letters in letters_not_in_word shold be in the word
    words = [
        word
        for word in words
        if not any(letter in word for letter in letters_not_in_word)
    ]
    # filter out any words that are not possible due to letters_correct_position
    words = [
        word
        for word in words
        if all(
            word[position] == letter for (letter, position) in letters_correct_position
        )
    ]
    # filter out any words that are not possible due to letters_wrong_position
    # e.g. the word should not have a letter in a position where we say the letter cannot be
    words = [
        word
        for word in words
        if all(
            word[position] != letter for (letter, position) in letters_wrong_position
        )
    ]
    return words

In [None]:
# Score a word - the score is based on how good the word is at getting a 50/50 split
# words - the list of words we're trying to split
# word - the word to score
def score_word(word, words):
    # go through earch of the words assuming the word is the correct answer and see how good our candidate word is at getting a 50/50 split
    score = 0
    for candidate_word in random.sample(words, 100):
        (
            letters_correct_position,
            letters_wrong_position,
            letters_in_word,
            letters_not_in_word,
        ) = check_word(word, candidate_word)
        matching_words = filter_words(
            words,
            letters_correct_position,
            letters_wrong_position,
            letters_in_word,
            letters_not_in_word,
        )
        score += abs(0.5 - len(matching_words) / len(words))
    return score

In [None]:
def get_best_guess(words):
    # the correct answer is in the list of words somewhere
    possibly_correct_answers = random.sample(words, min(len(words), 50))
    best_score = 1e9
    best_guess = words[0]
    for guess in random.sample(words, min(len(words), 50)):
        count = 0
        score = 1e9
        for possibly_correct in possibly_correct_answers:
            if guess != possibly_correct:
                (
                    letters_correct_position,
                    letters_wrong_position,
                    letters_in_word,
                    letters_not_in_word,
                ) = check_word(possibly_correct, guess)
                if letters_correct_position is not None:
                    filtered = filter_words(
                        words,
                        letters_correct_position,
                        letters_wrong_position,
                        letters_in_word,
                        letters_not_in_word,
                    )
                    score = max(score, len(filtered))
                    count += 1
        # score /= count
        if score < best_score:
            best_score = score
            best_guess = guess
    return best_guess

In [None]:
def guess(correct_answer, guess, words):
    guesses = 0
    while len(words) > 1:
        guesses = guesses + 1
        # get a random guess from the list of words
        # check the guess against the correct answer
        (
            letters_correct_position,
            letters_wrong_position,
            letters_in_word,
            letters_not_in_word,
        ) = check_word(guess, correct_answer)
        # filter the list of words
        words = filter_words(
            words,
            letters_correct_position,
            letters_wrong_position,
            letters_in_word,
            letters_not_in_word,
        )
        print(f"After guessing {guess}, we have {len(words)} words left")
        if len(words) > 1:
            guess = get_best_guess(words)
    print(f"Found {words[0]} after {guesses} guesses")
    return guesses

In [None]:
correct_answer = wordle_words[random.randint(0, len(wordle_words) - 1)]
print("Looking for", correct_answer)

guess(correct_answer, "raise", wordle_words)

In [None]:
results = []
print(len(wordle_words))
for word in tqdm(wordle_words):
    scores = []
    count = 0
    for candidate_word in wordle_words:
        if word != candidate_word:
            (
                letters_correct_position,
                letters_wrong_position,
                letters_in_word,
                letters_not_in_word,
            ) = check_word(word, candidate_word)
            filtered = filter_words(
                wordle_words,
                letters_correct_position,
                letters_wrong_position,
                letters_in_word,
                letters_not_in_word,
            )
            scores.append(len(filtered))
            # score = max(score, len(filtered))
            # count += 1
    # score /= count
    std = np.std(scores)
    results.append((std, word))
    results.sort()
    print(
        "Top 5 words",
        ",".join([f"{word} ({round(score,2)})" for score, word in results[:5]]),
    )