In [None]:
! wget https://raw.githubusercontent.com/dwyl/english-words/master/words_alpha.txt -O words.txt
! wget http://www.mieliestronk.com/corncob_lowercase.txt -O words_common.txt

In [1]:
# Read the dictionary into a list.
with open('words_common.txt') as file:
    ALL_WORDS = []
    for line in file:
        ALL_WORDS.append(line.rstrip())

print(f'There are {len(ALL_WORDS)} words')
print(f'Word #1237 is "{ALL_WORDS[1237]}".')

There are 58110 words
Word #1237 is "airtime".


In [2]:
def filter_by_length(words, length):
    return set(filter(lambda x: len(x) == length, words))

for i in range(1,15):
    print(f'{i}-letter words: {len(filter_by_length(ALL_WORDS, i))}')

1-letter words: 0
2-letter words: 47
3-letter words: 589
4-letter words: 2294
5-letter words: 4266
6-letter words: 6936
7-letter words: 9203
8-letter words: 9395
9-letter words: 7696
10-letter words: 6377
11-letter words: 4557
12-letter words: 3101
13-letter words: 1880
14-letter words: 924


In [3]:
def filter_by_minimum(words, letter, minimum=1):
    return set(filter(lambda x: x.count(letter) >= minimum, words))

def filter_by_maximum(words, letter, maximum):
    return set(filter(lambda x: x.count(letter) <= maximum, words))

print(f'Words with at least 4 z\'s in them: {filter_by_minimum(ALL_WORDS, "z", minimum=4)}.')

print(f'Words with at least 4 z\'s in them AND no p\'s: {filter_by_maximum(filter_by_minimum(ALL_WORDS, "z", minimum=4), "p", maximum=0)}.')

Words with at least 4 z's in them: {'razzmatazz'}.
Words with at least 4 z's in them AND no p's: {'razzmatazz'}.


In [4]:
def filter_by_position(words, letter, position):
    return set(filter(lambda x: position < len(x) and x[position] == letter, words))

def filter_by_not_position(words, letter, position):
    return set(filter(lambda x: position >= len(x) or x[position] != letter, words))

print(f'Words with an "x" in the 17th place: {filter_by_position(ALL_WORDS, "x", 16)}')
print(f'Words with an "x" in the 17th place without an "h" in the first place: {filter_by_not_position(filter_by_position(ALL_WORDS, "x", 16), "h", 0)}')

Words with an "x" in the 17th place: set()
Words with an "x" in the 17th place without an "h" in the first place: set()


In [5]:
import random
import string

class Wordle:

    def __init__(self, length=5, word=None):
        if word is None:
            self.word = random.choice(list(filter_by_length(ALL_WORDS, length)))
        else:
            self.word = word

    def __get_letter_count(self):
        return {x: self.word.count(x) for x in string.ascii_lowercase}

    def guess(self, guess):
        assert(len(guess) == len(self.word))
        letter_count = self.__get_letter_count()
        result = [None] * len(self.word)
        # Add correct guesses to result first.
        for i, letter in enumerate(guess):
            if self.word[i] == letter:
                result[i] = (0, letter)
                letter_count[letter] -= 1
        ## Add incorrect guesses to result.
        for i, letter in enumerate(guess):
            if result[i] == None:
                if letter_count[letter] > 0:
                    result[i] = (1, letter)
                else:
                    result[i] = (2, letter)
                letter_count[letter] -= 1
        return result


In [6]:
from termcolor import colored

def pretty_guess_result(guess_result):
    colors = {0: 'green', 1: 'yellow', 2: 'red'}
    result = ''
    for guess in guess_result:
        result += colored(guess[1], colors[guess[0]])
    return result

game = Wordle()

print(f'The word is {game.word}.')
print(pretty_guess_result(game.guess('skier')))
print(pretty_guess_result(game.guess('watch')))
print(pretty_guess_result(game.guess('bumpy')))
print(pretty_guess_result(game.guess('lodge')))


The word is shift.
[32ms[0m[31mk[0m[32mi[0m[31me[0m[31mr[0m
[31mw[0m[31ma[0m[33mt[0m[31mc[0m[33mh[0m
[31mb[0m[31mu[0m[31mm[0m[31mp[0m[31my[0m
[31ml[0m[31mo[0m[31md[0m[31mg[0m[31me[0m


In [7]:
import math

def update_words_remaining(words, guess_result):
    min_count = {}
    max_count = {}
    for i, letter_guess in enumerate(guess_result):
        score = letter_guess[0]
        letter = letter_guess[1]
        # Filter correct letters
        if score == 0:
            # print(f'...filtering words without "{letter}" in position {i}')
            words = filter_by_position(words, letter, i)
        # Count correct and partially correct letters
        if score <= 1:
            min_count[letter] = min_count.get(letter, 0) + 1
            if letter in max_count:
                max_count[letter] = min_count[letter]
        elif score == 2:
            max_count[letter] = min_count.get(letter, 0)
    for letter in min_count.keys():
        # print(f'...filtering by minimum {min_count[letter]} of "{letter}"')
        words = filter_by_minimum(words, letter, min_count[letter])
    for letter in max_count.keys():
        # print(f'...filtering by maximum {max_count[letter]} of "{letter}"')
        words = filter_by_maximum(words, letter, max_count[letter])
    return words

def entropy(words, all_words, depth, explore, detail):
    best_score = math.inf
    best_word = None
    num_words = len(words)
    assert(num_words > 0)
    explore_words = random.sample(list(all_words), min(len(all_words), explore))
    explore_words = list(filter(lambda x: x not in words, explore_words))
    word_list = random.sample(list(words), min(len(words), depth)) + explore_words
    for guess in word_list:
        score = 0
        skip = False
        answer_list = random.sample(list(words), min(detail, len(words)))
        entropic_guesses = 0
        for answer in answer_list:
            guess_score = (len(update_words_remaining(words, Wordle(word=answer).guess(guess))) / num_words) / detail
            score += guess_score
            if guess_score > 0:
                entropic_guesses += 1
            if score > best_score:
                skip = True
                break
        score *= detail
        if entropic_guesses == 0:
            score = math.inf
        else:
            score /= entropic_guesses
        if not skip:
            if guess in words:
                print(colored(f'{guess}: {score}', 'blue'))
        if not skip and (score < best_score or (score == best_score and guess not in explore_words)):
            best_score = score
            best_word = guess
    return (best_word, best_score)

# entropy(filter_by_length(ALL_WORDS, 5), all_words=None, depth=2000, detail=100)




In [8]:
def entropy_single_word(word, all_words, detail=None):
    if detail is None:
        detail = len(all_words)
    detail = min(len(all_words), detail)
    score = 0
    answer_list = random.sample(list(all_words), detail)
    for answer in answer_list:
        score += (len(update_words_remaining(all_words, Wordle(word=answer).guess(word))) / len(all_words)) / detail
    return score

In [9]:
best = (None, 0)

five_letter_words = filter_by_length(ALL_WORDS, 5)
for i, word in enumerate(five_letter_words):
    if i % 100 == 0:
        print(f'{100 * i // len(five_letter_words)}% - {i}/{len(five_letter_words)}')
    score = entropy_single_word(word, all_words=five_letter_words, detail=10)
    if score >= best[1]:
        best = (word, score)
        print(best)

0% - 0/4266
('tykes', 0.053375527426160335)
('cameo', 0.07890295358649789)
('inked', 0.08066104078762305)
('gauge', 0.09123300515705578)
('brink', 0.10916549460853257)
('hoggs', 0.1454758556024379)
('chord', 0.15314111579934367)
('runny', 0.16903422409751523)
('mouth', 0.24430379746835443)
('jazzy', 0.2829113924050633)
2% - 100/4266
('doggy', 0.2887013595874356)
4% - 200/4266
7% - 300/4266
9% - 400/4266
('popup', 0.349648382559775)
11% - 500/4266
14% - 600/4266
('fluff', 0.43544303797468353)
16% - 700/4266
18% - 800/4266
('ninny', 0.44334270979840595)
21% - 900/4266
23% - 1000/4266
25% - 1100/4266
28% - 1200/4266
30% - 1300/4266
32% - 1400/4266
35% - 1500/4266
37% - 1600/4266
39% - 1700/4266
42% - 1800/4266
44% - 1900/4266
46% - 2000/4266
49% - 2100/4266
51% - 2200/4266
53% - 2300/4266
56% - 2400/4266
58% - 2500/4266
60% - 2600/4266
('bobby', 0.46697140178152835)
63% - 2700/4266
65% - 2800/4266
('puppy', 0.4983122362869199)
67% - 2900/4266
70% - 3000/4266
72% - 3100/4266
75% - 3200/426

In [10]:
import heapq as hq

iterations = [(200, 20), (100, 40), (50, 100), (20, 1000), (5, 5000)]

five_letter_words = filter_by_length(ALL_WORDS, 5)
words = set.copy(five_letter_words)
for params in iterations:
    print(f'Beginning iteration {params}...')
    max_count = params[0]
    detail = params[1]

    min_heap = []
    prev_percent = None
    for i, word in enumerate(words):
        percent = (100 * i) // len(words)
        if percent % 10 == 0 and percent != prev_percent:
            print(f'{100 * i // len(words)}% - {i}/{len(words)}')
        prev_percent = percent
        score = entropy_single_word(word, all_words=five_letter_words, detail=detail)
        hq.heappush(min_heap, (score, word))

    words = set()
    print(hq.nsmallest(10, min_heap))
    while len(min_heap) != 0 and len(words) < max_count:
        words.add(hq.heappop(min_heap)[1])

Beginning iteration (200, 20)...
0% - 0/4266
10% - 427/4266
20% - 854/4266
30% - 1280/4266
40% - 1707/4266
50% - 2133/4266
60% - 2560/4266
70% - 2987/4266
80% - 3413/4266
90% - 3840/4266
[(0.026265822784810124, 'earls'), (0.028645100796999532, 'caste'), (0.029582747304266292, 'holes'), (0.029946085325832157, 'dries'), (0.030356305672761368, 'spear'), (0.03103609939052977, 'males'), (0.03158696671354899, 'reals'), (0.031586966713549, 'stole'), (0.03185654008438818, 'snarl'), (0.032079231129864036, 'riles')]
Beginning iteration (100, 40)...
0% - 0/200
10% - 20/200
20% - 40/200
30% - 60/200
40% - 80/200
50% - 100/200
60% - 120/200
70% - 140/200
80% - 160/200
90% - 180/200
[(0.02740857946554149, 'terns'), (0.027432020628223166, 'least'), (0.02851031411157994, 'aloes'), (0.029406938584153778, 'aster'), (0.02955930614158462, 'raise'), (0.030584857008907642, 'earls'), (0.031417018284106896, 'aeons'), (0.03175105485232068, 'leans'), (0.03179793717768402, 'pares'), (0.0318272386310361, 'rains')

KeyboardInterrupt: 

In [None]:
"""
[(0.03448389549263692, 'saner'), (0.03451587570051452, 'earls'), (0.035463687737776604, 'stare'),
(0.03500750271062498, 'raise'), (0.035267355636835744, 'laser'), (0.036971977645065136, 'snare'),
(0.03614521783796628, 'nears'), (0.035189932762437426, 'aloes'), (0.03516102968796359, 'tales'),
(0.03724023773932701, 'arose'), (0.037562897156267205, 'roles'), (0.037007474576834, 'slate'),
(0.03809090027911765, 'riles'), (0.03714979199677189, 'arise'), (0.03748921080100174, 'soler')]
"""
BEST_FIRST_5_LETTER_CHOICES = [x[1] for x in min_heap]
print(BEST_FIRST_5_LETTER_CHOICES)

In [None]:
class Agent:
    def __init__(self, length=5):
        self.starting_words = filter_by_length(ALL_WORDS, length)
        self.words_remaining = filter_by_length(ALL_WORDS, length)

    def guess(self):
        return random.choice(list(self.words_remaining))

    def guess_smart(self, depth, explore, detail):
        return entropy(self.words_remaining, self.starting_words, depth, explore, detail)[0]
    
    def update_with_guess_result(self, guess_result):
        self.words_remaining = update_words_remaining(self.words_remaining, guess_result)
        

def play_game_simple(length=5):
    game = Wordle(length=length)
    agent = Agent(length=length)

    turn = 0
    print(f'Starting a game with a {length}-letter word.')
    while True:
        if turn == 0 and length == 5:
            word = random.choice(BEST_FIRST_5_LETTER_CHOICES)
        else:
            word = agent.guess_smart(depth=1000, explore=1000, detail=100)
        result = game.guess(word)
        agent.update_with_guess_result(result)
        print(pretty_guess_result(result))
        if word == game.word:
            break
        turn += 1


    print(f'The word was: "{game.word}".')

def play_game_manual(length=5):
    agent = Agent(length=length)

    print(f'Starting a game with a {length}-letter word.')
    while True:
        word = agent.guess()
        print(word)
        input('...')
        score = input('Enter the score: ')
        if score.isnumeric():
            guess_result = []
            for i, letter in enumerate(word):
                guess_result.append((int(score[i]), letter))
            agent.update_with_guess_result(guess_result)
            print(pretty_guess_result(guess_result))


play_game_simple(length=5)
# play_game_manual(length=10)

In [None]:
ALL_WORDS = ['went', 'vent', 'bent', 'dent', 'ment', 'cats', 'dogs', 'leap', 'poop', 'meow', 'aoet', 'trip', 'xxxx', 'wvbd', 'bump', 'ququ']



In [None]:
"""
Words remaining:
boy bat bit cat car cot coy

Letter -->  Split
------------------
b _ _  -->  3 : 4
c _ _  -->  4 : 3
_ o _  -->  3 : 4
_ a _  -->  3 : 4
_ i _  -->  1 : 6
_ _ y  -->  2 : 5
_ _ t  -->  4 : 3
_ _ r  -->  1 : 6

len = 7
half = 3.5
3
"""
