In [1]:
## Not great, has names

with open('words.txt', 'r') as f:
    allwords = f.read().split()

In [73]:
import numpy as np

ALLCAPS = {l:u for (l,u) in zip('abcdefghijklmnopqrstuvwxyz', 'ABCDEFGHIJKLMNOPQRSTUVWXYZ')}

## BASIC HELPER FUNC
def chunks(w):
    return [e for e in w]

def compare(w1, w2):
    """First round of comparing: positional comparison"""
    return [a==b for (a,b) in zip(w1, w2)]

def count_c(c, w):
    """counts characters in w (can be list or str)"""
    return len([e for e in w if e==c])

def has_valid_char(word, char_bank):
    """Check if word has chars found in char_bank"""
    return len(set(word) - char_bank) == 0

def is_unordered_sublist(sub, super_):
    """check if sub is contained in super_"""
    bools = []
    for c in set(sub):
        if c in super_ and count_c(c, sub) == count_c(c, super_):
            bools.append(1)
        else:
            bools.append(0)
            
    return 0 not in bools

## INFO THEORY
def entropy(word):
    prob_log_prob = []
    for c in set(word):
        p = count_c(c, word)/len(word)
        prob_log_prob.append(p*np.log(p))
    return -sum(prob_log_prob)

## CLASSES
class Wordle:
    """class to simulate Wordle"""
    def __init__(self, word):
        self.word = word
        self.length = len(word)
        self.history = []
        self.solved = False
    
    def show_flags(self, guess, flags):
        """print guess along with its flags side by side"""
        for c in guess:
                print(ALLCAPS[c], '', end='')
        print('\n')
        for f in flags:
            print(f, '', end='')
    
    def verify(self, guess):
        # include try/catch to make sure green letters are there? (not in game)
        flags = []
        c_encounter = {c:count_c(c, self.word) for c in set(self.word)}
        if guess == self.word:
            print('correct!')
            flags = ['+' for i in range(self.length)]
            self.solved = True
            return
        word2word = compare(guess, self.word)
        for ix, b in enumerate(word2word):
            char = guess[ix]
            ## GREEN right character, right position
            if b:
                flags.append('+') #green
                c_encounter[char] -= 1
            else:
                ## YELLOW right character, wrong position
                ## accounts for character count. e.g., if character count of 1
                ## and shown yellow flag, should be gray on its next encounter in the guess word
                if char in self.word and c_encounter[char] > 0: 
                    flags.append('!') #yellow
                    c_encounter[char] -= 1
                ## GRAY no such character
                else:
                    flags.append('-')
        
        self.history.append(flags)
        self.show_flags(guess, flags)
        return flags
    
class WordleInference:
    """Contains features/inference data"""
    def __init__(self, length, word_bank):
        self.optimal = ['_' for i in range(length)] # fixes green letters
        self.word_bank = word_bank
        self.priority_char = [] #accounts for repeated char (hence a list)
        self.char_bank = {c for c in 'abcdefghijklmnopqrstuvwxyz'}
        self.null_opt = ''.join(self.optimal) # _____
        
    def update_priors(self, guess, flags):
        # theoretically like gradient descent
#         print(100, guess)
        for ix, (c, f) in enumerate(zip(guess, flags)):
            if f == '+': # Green
                self.optimal[ix] = c
            elif f == '!': # Yellow
                self.priority_char.append(c)
            elif f == '-' and c not in self.priority_char: # Gray
                try:
                    self.char_bank.remove(c)
                except KeyError: # for duplicate absent letters
                    pass
    
## commenting out word bank removal since it seems to converge to qquickly
    def next_guess(self):
        candidates = []
        for w in word_bank:
            # Make sure no "gray"  letters
#             print(113, w)
            if not has_valid_char(w, self.char_bank):
                if w == 'crimp':
                    print('çorrect word removed', self.char_bank)
                self.word_bank.remove(w)
                continue
            if not is_unordered_sublist(self.priority_char, w):
                self.word_bank.remove(w)
                continue
            if self.optimal != self.null_opt:
                # compare these two variables to decide if candidate
                fixed_char_count = 0
                w_sim_count = 0 
                for ix, c in enumerate(self.optimal):
                    if c == '+':
                        fixed_char_count += 1
                        if w[ix] == c:
                            w_sim_count += 1
                if fixed_char_count == w_sim_count:
                    candidates.append(w)
                else:
                    self.word_bank.remove(w)
        print(137, len(candidates))
        if len(candidates) == 0:
            print(self.word_bank)
        return candidates[np.argmax([entropy(w) for w in candidates])] 

In [3]:
word = 'crimp'
x = Wordle(word)
x.verify('filni')

F I L N I 

- ! - - - 

['-', '!', '-', '-', '-']

# algorithm

1. start with n-letter word with highest entropy
2. if green, fix position
3. if yellow, switch position

Data science questions:
- are there any words for which it is very hard for this algorithm (or human) to guess within 6 tries?

In [74]:
# word_bank = [w for w in allwords if len(w) == len(word)]
# np.random.shuffle(word_bank) # enables starting with random choice from all high entropy words
# guess = word_bank[np.argmax([entropy(w) for w in word_bank])] #start guess

word = 'crimp'
wordle = Wordle(word)
inferer = WordleInference(len(word), word_bank)
for t in range(10):
    print('\nguess', t+1)
    flags = wordle.verify(guess)
    if wordle.solved:
        print(f'solved word "{guess}" in {t+1} steps')
        break

    inferer.update_priors(guess, flags)
    guess = inferer.next_guess()


guess 1
B A T H S 

- - - - - 137 545

guess 2
M O L D Y 

! - - - - 137 12

guess 3
G R I M E 

- + + + - 137 2

guess 4
P U R I M 

! - ! ! ! 137 0
['waldo', 'edges', 'bombs', 'polyp', 'overt', 'peeve', 'croak', 'chins', 'graph', 'leads', 'poole', 'boozy', 'slued', 'prick', 'merge', 'loots', 'swath', 'renee', 'gimps', 'leaks', 'evict', 'flint', 'horns', 'sauna', 'plies', 'sowed', 'firth', 'holes', 'ellis', 'loved', 'sakes', 'defoe', 'daces', 'silts', 'salus', 'birch', 'least', 'rigor', 'drill', 'stone', 'clasp', 'fetch', 'korea', 'lulls', 'ditty', 'yolky', 'point', 'limey', 'saves', 'joint', 'based', 'guilt', 'clack', 'canoe', 'chair', 'navvy', 'junks', 'basil', 'wicks', 'brake', 'nouns', 'gouda', 'upend', 'basks', 'lumen', 'guile', 'viler', 'folly', 'enure', 'moist', 'unsex', 'noted', 'ruing', 'mises', 'aging', 'sooty', 'plant', 'recta', 'voids', 'mimed', 'tzars', 'cairn', 'lotus', 'vying', 'dwyer', 'zaire', 'foggy', 'talky', 'canny', 'buggy', 'solar', 'cacti', 'mince', 'mooed', 'e

ValueError: attempt to get argmax of an empty sequence