In [1]:
import fileinput
import string
from collections import defaultdict, Counter
from itertools import filterfalse
import random

https://www.nytimes.com/games/wordle/index.html

In [9]:
def inc_exc(words, letter_groups, include, exclude):
    include, exclude = set(include), set(exclude)
    possible = set(words)
    
    for ch in include:
        possible &= letter_groups[ch]

    print(f'Words that include {include}: {len(possible):,}')
    for ch in exclude:
        possible -= letter_groups[ch]

    print(f'Words that include {include}, but exclude {exclude}: {len(possible):,}')
    
    return possible

def group_by_letter(words):
    letters = defaultdict(set)
    for ch in string.ascii_lowercase:
        for word in words:
            if ch in word:
                letters[ch].add(word)
    return letters

In [31]:
words = [word.strip() for word in fileinput.input('words_alpha.txt')]
print(f'All words: {len(words):,}')

five_letter_words = [word for word in words if len(word) == 5]
five_letters_grouped = group_by_letter(five_letter_words)
five = lambda inc, exc: inc_exc(five_letter_words, five_letters_grouped, inc, exc)

print(f'Five-letter words: {len(five_letter_words):,}')

All words: 370,105
Five-letter words: 15,920


## Guesses

### Method 1: Uniformally random word

In [21]:
distinct_letters = list(filter(lambda word: len(set(word)) == len(word), five_letter_words))

In [5]:
random.choice(distinct_letters)

'fused'

### Method 2: Pick words with letters that occur the most

The end goal is to eliminate as many words as possible.  For the first guess, if we pick a word that has letters that occur the most, we can see if any of those letters can be discarded.  Because we used the most frequently used letters, we theoretically eliminate more words per letter using these letters.

In [22]:
hist = Counter()
for word in five_letter_words:
    for ch in word:
        hist[ch] += 1

In [23]:
hist.most_common(10)

[('a', 8393),
 ('e', 7802),
 ('s', 6537),
 ('o', 5219),
 ('r', 5144),
 ('i', 5067),
 ('l', 4247),
 ('t', 4189),
 ('n', 4044),
 ('u', 3361)]

In [32]:
possible = five('aesor', '')
possible

Words that include {'o', 'a', 'r', 'e', 's'}: 3
Words that include {'o', 'a', 'r', 'e', 's'}, but exclude set(): 3


{'arose', 'oreas', 'seora'}

## Filter words

In [58]:
print(f'Words with five letters: {len(five_letter_words):,}')

Words with five letters: 15,920


In [62]:
possible = five('roe', 'ascdtwbxv')

possible = sorted(list(possible))

possible = list(filter(lambda word: word[1] != 'r' and word[2] != 'o' and word[4] != 'e', possible))
possible = list(filter(lambda word: word[1] == 'o' and word[3] == 'e' and word[4] == 'r', possible))
possible = list(filter(lambda word: word[0] == 'h', possible))

print(len(possible))
possible

Words that include {'o', 'e', 'r'}: 414
Words that include {'o', 'e', 'r'}, but exclude {'b', 'c', 't', 'w', 'a', 'v', 'x', 'd', 's'}: 106
5


['hoker', 'holer', 'homer', 'honer', 'hoper']

## Save results

In [109]:
import sqlite3
from collections import namedtuple
import itertools

Result = namedtuple('Result', ['wordle', 'date', 'words', 'regular', 'high_contrast'])
def println(*objects, **kw):
    print(*objects, end='\n\n', **kw)
    
def print_results(results):
    for rowid, result in enumerate(results, start=1):
        result = Result(*result)
        println(f'rowid: {rowid:3}')

        hi = result.high_contrast.splitlines()
        header = hi[0]
        words = result.words.splitlines()

        print(header)
        for line, word in zip(itertools.islice(hi, 2, None), words):
            print(f'{line} {word}')
        print()
    
con = sqlite3.connect('wordle.db')
cur = con.cursor()

In [110]:
print_results(cur.execute('''SELECT * FROM results''').fetchall())

rowid:   1

Wordle 319 2/6*
⬜🟦🟧⬜🟦 unapt
🟧🟧🟧🟧🟧 train

rowid:   2

Wordle 320 6/6*
⬜🟦🟦⬜🟦 arose
⬜🟧⬜🟧🟧 coder
⬜🟧⬜🟧🟧 tower
⬜🟧⬜🟧🟧 boxer
🟧🟧⬜🟧🟧 hover
🟧🟧🟧🟧🟧 homer



### Create table

Only need to do once.  Uncomment to execute.

In [65]:
# cur.execute('''
# CREATE TABLE results 
# (wordle, date, words, regular, high_contrast)
# ''')
# con.commit()

### Save result

To keep it simple, everything is stored as TEXT.

In [68]:
cur.execute('''
INSERT INTO results VALUES (
'320', '2022-05-05', 
'arose
coder
tower
boxer
hover
homer', 
'Wordle 320 6/6*

⬜🟨🟨⬜🟨
⬜🟩⬜🟩🟩
⬜🟩⬜🟩🟩
⬜🟩⬜🟩🟩
🟩🟩⬜🟩🟩
🟩🟩🟩🟩🟩', 
'Wordle 320 6/6*

⬜🟦🟦⬜🟦
⬜🟧⬜🟧🟧
⬜🟧⬜🟧🟧
⬜🟧⬜🟧🟧
🟧🟧⬜🟧🟧
🟧🟧🟧🟧🟧'
)''')

con.commit()

### SQL utilities

In [76]:
# cur.execute('''DELETE FROM results WHERE rowid=3''')
# con.commit()

In [111]:
# close connection to database
con.close()