In [1]:
import fileinput
import string
from collections import defaultdict, Counter
from collections.abc import Callable
from itertools import filterfalse
import random

https://www.nytimes.com/games/wordle/index.html

In [2]:
def inc_exc(words: list[str], 
            letter_groups: dict[str, set[str]], 
            include: str, 
            exclude: str) -> list[str]:
    """Filters words based on the characters in include and exclude.
    
    If include and exclude are empty, inc_exc returns the same 
    list of words, albeit a copy of the original list with duplicate words removed.
    
    Args:
        words: A list of words.
        letter_groups: A mapping between a character in a word and 
            a subset from words that contain that character.
        include: A string of characters that must be contained in all the output words.
        exclude: A string of characters that must _not_ be contained in any output words.
        
    Returns:
        A list of filtered words where each word contains all the characters in include, but 
        does not have the characters listed in exclude.
    """
    include, exclude = set(include), set(exclude)
    possible = set(words)
    
    for ch in include:
        possible &= letter_groups[ch]

    print(f'Words that include {include}: {len(possible):,}')
    for ch in exclude:
        possible -= letter_groups[ch]

    print(f'Words that include {include}, but exclude {exclude}: {len(possible):,}')
    
    return list(possible)

def group_by_letter(words: list[str], char_set: str=string.ascii_lowercase) -> dict[str, set[str]]:
    """Groups words into sets based on what character is contained in the word.
    
    Args:
        words: A list of words.
        char_set: A string containing all the characters used to compose words.
        
    Returns:
        A mapping between each character in char_set and 
        a subset of words that contain that character.
    """
    letters = defaultdict(set)
    for ch in char_set:
        for word in words:
            if ch in word:
                letters[ch].add(word)
    return letters

class Possible(object):
    def __init__(self, iterable: list[str]):
        print(f'Sorting values')
        iterable.sort()
        self.values = iterable
    
    def distinct_letters_only(self):
        self.filter(lambda word: len(set(word)) == len(word))
        
    def filter(self, condition: Callable[[str], bool]):
        self.values = list(filter(condition, self.values))
        
    def __repr__(self):
        return f'Possible({self.values})'
    
    def __len__(self):
        return len(self.values)
        

In [3]:
words = [word.strip() for word in fileinput.input('words_alpha.txt')]
print(f'All words: {len(words):,}')

five_letter_words = [word for word in words if len(word) == 5]
five_letters_grouped = group_by_letter(five_letter_words)
five = lambda inc, exc: inc_exc(five_letter_words, five_letters_grouped, inc, exc)

print(f'Five-letter words: {len(five_letter_words):,}')

All words: 370,105
Five-letter words: 15,920


## Guesses

### Method 1: Uniformally random word

In [4]:
distinct_letters = list(filter(lambda word: len(set(word)) == len(word), five_letter_words))

In [5]:
random.choice(distinct_letters)

'liven'

### Method 2: Pick words with letters that occur the most

The end goal is to eliminate as many words as possible.  For the first guess, if we pick a word that has letters that occur the most, we can see if any of those letters can be discarded.  Because we used the most frequently used letters, we theoretically eliminate more words per letter using these letters.

In [6]:
hist = Counter()
for word in five_letter_words:
    for ch in word:
        hist[ch] += 1

In [7]:
hist.most_common(10)

[('a', 8393),
 ('e', 7802),
 ('s', 6537),
 ('o', 5219),
 ('r', 5144),
 ('i', 5067),
 ('l', 4247),
 ('t', 4189),
 ('n', 4044),
 ('u', 3361)]

In [8]:
possible = five('eson', '')
possible

Words that include {'o', 'n', 'e', 's'}: 64
Words that include {'o', 'n', 'e', 's'}, but exclude set(): 64


['onces',
 'cones',
 'omens',
 'owsen',
 'noser',
 'hones',
 'peons',
 'nodes',
 'secno',
 'tenso',
 'snoek',
 'seton',
 'kenos',
 'sones',
 'sonde',
 'noise',
 'lenos',
 'norse',
 'solen',
 'scone',
 'jones',
 'meson',
 'somne',
 'noses',
 'opens',
 'osone',
 'neons',
 'slone',
 'nomes',
 'nosed',
 'aeons',
 'onset',
 'noels',
 'steno',
 'seron',
 'soken',
 'nones',
 'bones',
 'shone',
 'omnes',
 'cosen',
 'nemos',
 'eosin',
 'doesn',
 'xenos',
 'notes',
 'senor',
 'pones',
 'senso',
 'snore',
 'ovens',
 'enows',
 'tones',
 'zones',
 'nosey',
 'segno',
 'ornes',
 'noose',
 'hosen',
 'snoke',
 'ebons',
 'stone',
 'genos',
 'enols']

## Filter words

In [9]:
print(f'Words with five letters: {len(five_letter_words):,}')

Words with five letters: 15,920


In [15]:
words = Possible(five('eram', 'livnscwb'))

words.filter(lambda word: word[3] == 'e')
words.filter(lambda word: word[2] != 'r')
words.filter(lambda word: word[4] == 'r' and word[0] != 'a' and word[1] != 'm')
# words.distinct_letters_only()

print(len(words))
words

Words that include {'m', 'e', 'a', 'r'}: 64
Words that include {'m', 'e', 'a', 'r'}, but exclude {'l', 'c', 'w', 'v', 'b', 's', 'n', 'i'}: 41
Sorting values
6


Possible(['gamer', 'maker', 'mater', 'mayer', 'mazer', 'tamer'])

## Save results

In [22]:
import sqlite3
from collections import namedtuple
import itertools

Result = namedtuple('Result', ['wordle', 'date', 'words', 'regular', 'high_contrast'])
def println(*objects, **kw):
    print(*objects, end='\n\n', **kw)
    
def print_results(results, show_rowid=False):
    def header(head, rowid):
        msg = f'{head}'
        if show_rowid:
            msg += f' rowid: {rowid}'
            
        return msg
        
    for rowid, *result in results:
        result = Result(*result)

        hi = result.high_contrast.splitlines()
        words = result.words.splitlines()
        
        print(header(hi[0], rowid))
        for line, word in itertools.zip_longest(itertools.islice(hi, 2, None), words, fillvalue=''):
            print(f'{line} {word}')
        print()
    
con = sqlite3.connect('wordle.db')
cur = con.cursor()

In [23]:
most_recent = cur.execute('''SELECT ROWID, * FROM results ORDER BY ROWID DESC''').fetchall()[:5]
print_results(most_recent)

Wordle 335 4/6*
â¬œâ¬œâ¬œðŸŸ§â¬œ liven
â¬œâ¬œðŸŸ¦ðŸŸ§â¬œ screw
ðŸŸ¦ðŸŸ¦â¬œðŸŸ§ðŸŸ§ amber
ðŸŸ§ðŸŸ§ðŸŸ§ðŸŸ§ðŸŸ§ gamer

Wordle 334 4/6*
ðŸŸ¦â¬œâ¬œðŸŸ¦â¬œ apple
â¬œðŸŸ¦ðŸŸ¦â¬œðŸŸ§ malus
ðŸŸ¦â¬œðŸŸ§â¬œðŸŸ§ loans
ðŸŸ§ðŸŸ§ðŸŸ§ðŸŸ§ðŸŸ§ glass

Wordle 333 3/6*
â¬œðŸŸ¦â¬œðŸŸ¦â¬œ noise
ðŸŸ§â¬œâ¬œâ¬œðŸŸ¦ salvo
ðŸŸ§ðŸŸ§ðŸŸ§ðŸŸ§ðŸŸ§ scour

Wordle 332 5/6*
â¬œâ¬œâ¬œâ¬œðŸŸ¦ psoae
â¬œâ¬œâ¬œðŸŸ¦â¬œ xylem
â¬œðŸŸ§â¬œâ¬œâ¬œ hertz
â¬œðŸŸ§ðŸŸ§ðŸŸ¦ðŸŸ¦ feign
ðŸŸ§ðŸŸ§ðŸŸ§ðŸŸ§ðŸŸ§ being

Wordle 331 4/6*
â¬œâ¬œâ¬œâ¬œðŸŸ§ stoae
â¬œâ¬œâ¬œâ¬œðŸŸ§ knife
â¬œâ¬œâ¬œðŸŸ§ðŸŸ§ curve
ðŸŸ§ðŸŸ§ðŸŸ§ðŸŸ§ðŸŸ§ delve



### Create table

Only need to do once.  Uncomment to execute.

In [18]:
# cur.execute('''
# CREATE TABLE results 
# (wordle, date, words, regular, high_contrast)
# ''')
# con.commit()

### Save result

To keep it simple, everything is stored as TEXT.

In [19]:
cur.execute('''
INSERT INTO results VALUES (
'335', '2022-05-20', 
'liven
screw
amber
gamer', 
'Wordle 335 4/6*

â¬œâ¬œâ¬œðŸŸ©â¬œ
â¬œâ¬œðŸŸ¨ðŸŸ©â¬œ
ðŸŸ¨ðŸŸ¨â¬œðŸŸ©ðŸŸ©
ðŸŸ©ðŸŸ©ðŸŸ©ðŸŸ©ðŸŸ©', 
'Wordle 335 4/6*

â¬œâ¬œâ¬œðŸŸ§â¬œ
â¬œâ¬œðŸŸ¦ðŸŸ§â¬œ
ðŸŸ¦ðŸŸ¦â¬œðŸŸ§ðŸŸ§
ðŸŸ§ðŸŸ§ðŸŸ§ðŸŸ§ðŸŸ§'
)''')

con.commit()

### SQL utilities

In [20]:
# cur.execute('''DELETE FROM results WHERE wordle='325' ''')
# con.commit()

In [21]:
# close connection to database
con.close()