In [1]:
# setup
from __future__ import annotations
from time import time
import random
from collections import defaultdict
# get the words
from nltk.corpus import words
from english_words import english_words_lower_set 

# fivers = [word for word in words.words() if len(word)==5]
fivers = [word for word in english_words_lower_set if len(word)==5]
print(len(fivers))

3210


In [2]:
freqs = defaultdict(int)
for word in fivers:
    for letter in word:
        freqs[letter]+=1
        
print(freqs)
freq_sorted_fivers = sorted(fivers, key = lambda w: sum([freqs[l] for l in set(w)]), reverse=True)
print(freq_sorted_fivers[:10])

defaultdict(<class 'int'>, {'h': 535, 'a': 1550, 't': 947, 'c': 656, 'e': 1679, 'l': 1004, 'o': 1053, 'x': 67, 'r': 1126, 'm': 467, 'v': 195, 'i': 947, 's': 958, 'y': 585, 'p': 466, 'u': 611, 'g': 410, 'n': 869, 'd': 513, 'b': 423, 'w': 232, 'f': 274, 'j': 58, 'k': 298, "'": 15, 'z': 72, 'q': 38, '.': 2})
['arose', 'erato', 'orate', 'lares', 'alert', 'later', 'alter', 'aires', 'aster', 'raise']


In [3]:
def get_word() -> str:
    return random.choice(fivers)
#     return "splat" # placeholder

# returns a list evaluating guess by index. 
# 0 = not in word
# 1 = in word, wrong loc
# 2 = in word, right loc
def guess_word(guess: str, word: str) -> list[int]:
    assert len(guess) == len(word) == 5
    check = [0]*5
    for i in range(len(guess)):
        if guess[i] not in word:
            continue
        check[i]+=1
        if guess[i] == word[i]:
            check[i]+=1
            
    return check

def generate_guess(
    word: str,
    past_guesses: list[str], 
    past_checks: list[list[int]],
    word_set: list[str],
    possible_words: list[str],
    use_novelty_filt: bool) -> str:
    
    
    contained = set() #set, equiv to yellows
    located = {} #dict{idx:char}, equiv to greens
    not_contained = set() # bad guesses

    for g in range(len(past_guesses)):
        for i in range(len(past_guesses[g])):
            guess = past_guesses[g][i]
            check = past_checks[g][i]
            if check == 0:
                not_contained.add(guess)
            if check >= 1:
                contained.add(guess)
            if check == 2:
                located[i]=guess

    all_letters = contained.union(not_contained)
    def novelty_filt(word):
        wset = set(word)
        return wset.isdisjoint(all_letters)
        
    def filt(word):
        # fail words without placed greens
        for idx, c in located.items():
            if word[idx] != c:
                return False
        # fail words which don't contain the yellows
        wset = set(word)
        if not wset >= contained:
            return False
        if not wset.isdisjoint(not_contained): # don't reuse known bad letters
            return False
        
        if word in (past_guesses):
            return False
        
        return True
    
    #filter possible_words in place to reduce list size each round
    for idx in range(len(possible_words))[::-1]:
        if not filt(possible_words[idx]):
            possible_words.pop(idx)
    
    if use_novelty_filt and len(contained)<5:
        common_novel = filter(novelty_filt, freq_sorted_fivers)
        for guess in common_novel:
            if len(set(guess))==5: # choose the first word with 5 uniqe letters
                return guess
            
    return random.choice(possible_words)

# recursion, whee!
def guess_until_right(
    word: str,
    past_guesses: list[str], 
    past_checks: list[list[int]],
    word_set: list[str],
    possible_words: list[str],
    novelty_rnds: bool) -> tuple[str,list,list]:

    
    current_guess = generate_guess(word, past_guesses, past_checks, word_set, possible_words, len(past_guesses)<novelty_rnds)
    current_check = guess_word(current_guess, word)
#     print(f"{current_guess}: {current_check}")
    past_guesses.append(current_guess)
    past_checks.append(current_check)
    
    if sum(current_check) == 10:
        return word, past_guesses, past_checks
    else:
        return guess_until_right(word, past_guesses, past_checks, word_set, possible_words, novelty_rnds)
    
def play_game(novelty_rnds):
    word = get_word()
    possible_words = [word for word in fivers]
    
    return guess_until_right(word, [], [], fivers, possible_words, novelty_rnds)
    

In [4]:
start = time()
rounds = 250
tries = 0
for _ in range(rounds):
    word, guesses, checks = play_game(3)
    tries += len(guesses)
#     print(f"guessed {w} in {len(gs)} tries")
print(f"finished {rounds} rounds in {time()-start}s, avergaging {tries/rounds} guesses/round")

finished 250 rounds in 1.5652432441711426s, avergaging 4.396 guesses/round


In [None]:

for i in range(5):
    start = time()
    rounds = 2500
    tries = 0
    for _ in range(rounds):
        word, guesses, checks = play_game(i)
        tries += len(guesses)
    #     print(f"guessed {w} in {len(gs)} tries")
    print(f"finished {rounds} rounds in {time()-start}s, avergaging {tries/rounds} guesses/round, using {i} novel guesses")