In [103]:
import os
import json
import random
import statistics as stats

Source: https://www-cs-faculty.stanford.edu/~knuth/sgb.html

In [61]:
with open("sgb-words.txt", "r") as f:
    words = [w for w in f.read().split("\n") if len(w) == 5]
print(len(words))
print(words[:10])

5757
['which', 'there', 'their', 'about', 'would', 'these', 'other', 'words', 'could', 'write']


In [67]:
def test_wordle_guess(guess, answer):
    print(guess, answer)
    
    green = {}
    yellow = set()
    red = set()
    bad_duplicates = {}
    good_duplicates = {}
    
    current_chars = []
    
    for i in range(5):
        q_char = guess[i]
        current_chars.append(q_char)
        
        if (q_char in bad_duplicates) or (q_char in red):
            # Letter already used and not present, skip
            continue
        
        if not q_char in answer:
            # Character is not in word
            print(f"Character not present: {q_char}")
            red.add(q_char)
            continue
            
        if answer[i] == q_char:
            # Character is present, and in correct place
            print(f"Character correctly placed: {q_char}, {i}")
            green[i] = q_char
            continue
            
        # Check whether it is a duplicate letter
        guess_count = current_chars.count(q_char)
        answer_count = answer.count(q_char)
        print(f"Guess count: {guess_count}, Answer count: {answer_count}")
        if answer_count < guess_count:
            # Letter is a duplicate, and the subsequent one is not in answer
            print(f"Bad duplicate letter: {q_char}, {guess_count}")
            bad_duplicates[q_char] = guess_count
            continue
        elif guess_count > 1:
            good_duplicates[q_char] = guess_count
            
        # Letter must be present in answer, but in wrong place.
        print(f"Letter incorrectly placed: {q_char}, {i}")
        yellow.add( (q_char, i) )
        continue
        
    return (green, yellow, red, bad_duplicates, good_duplicates)


def wordle_guess(guess, answer):
    green = {}
    yellow = set()
    red = set()
    bad_duplicates = {}
    good_duplicates = {}
    
    current_chars = []
    
    for i in range(5):
        q_char = guess[i]
        current_chars.append(q_char)
        
        if (q_char in bad_duplicates) or (q_char in red):
            # Letter already used and not present, skip
            continue
        
        if not q_char in answer:
            # Character is not in word
            red.add(q_char)
            continue
            
        if answer[i] == q_char:
            # Character is present, and in correct place
            green[i] = q_char
            continue
            
        # Check whether it is a duplicate letter
        guess_count = current_chars.count(q_char)
        answer_count = answer.count(q_char)
        if answer_count < guess_count:
            # Letter is a duplicate, and the subsequent one is not in answer
            bad_duplicates[q_char] = guess_count
            continue
        elif guess_count > 1:
            good_duplicates[q_char] = guess_count
            
        # Letter must be present in answer, but in wrong place.
        yellow.add( (q_char, i) )
        continue
        
    return (green, yellow, red, bad_duplicates, good_duplicates)

In [68]:
info = test_wordle_guess("henna", "banal")
print(info)

henna banal
Character not present: h
Character not present: e
Character correctly placed: n, 2
Guess count: 2, Answer count: 1
Bad duplicate letter: n, 2
Guess count: 1, Answer count: 2
Letter incorrectly placed: a, 4
({2: 'n'}, {('a', 4)}, {'h', 'e'}, {'n': 2}, {})


In [69]:
def test_filter_words(word_list, green, yellow, red, bad_dupes, good_dupes):
    fwl = word_list
    
    print("Green:", green)
    print("Yellow:", yellow)
    print("Red:", red)
    
    for i in green:
        print("green", green[i])
        fwl = [w for w in fwl if w[i]==green[i]]
        
    print(fwl)
        
    for tup in yellow:
        char = tup[0]
        loc = tup[1]
        fwl = [w for w in fwl if (char in w) and (w[loc]!=char)]
        
    print(fwl)
    
    for r in red:
        print("red:", r)
        fwl = [w for w in fwl if r not in w]
        
    print(fwl)
        
    for b in bad_dupes:
        print("bad_dupes:", b)
        fwl = [w for w in fwl if w.count(b) < bad_dupes[b]]
        
    print(fwl)
        
    for g in good_dupes:
        print("good_dupes:", g)
        fwl = [w for w in fwl if w.count(g) >= good_dupes[g]]
        
    print(fwl)
    
    
def filter_words(word_list, green, yellow, red, bad_dupes, good_dupes):
    fwl = word_list
    
    for i in green:
        fwl = [w for w in fwl if w[i]==green[i]]
        
    for tup in yellow:
        char = tup[0]
        loc = tup[1]
        fwl = [w for w in fwl if (char in w) and (w[loc]!=char)]
    
    for r in red:
        fwl = [w for w in fwl if r not in w]
        
    for b in bad_dupes:
        fwl = [w for w in fwl if w.count(b) < bad_dupes[b]]
        
    for g in good_dupes:
        fwl = [w for w in fwl if w.count(g) >= good_dupes[g]]
        
    return fwl

In [33]:
if os.path.isfile("guess_values.txt"):
    with open("guess_values.txt", "r") as f:
        results = json.load(f)
else:
    results = []

total = len(words)
start_point = len(results)

try:
    for i in range(start_point, total):
        print(f"\r{i} / {total}", end="")
        guess_results = []
        guess = words[i]

        for j in range(total):
            answer = words[j]
            info = wordle_guess(guess, answer)
            guess_results.append(len(filter_words(words, *info)))

        score = sum(guess_results) / len(guess_results)
        results.append( (guess, score) )
        
except KeyboardInterrupt:
    pass
    
finally:
    with open("guess_values.txt", "w+") as f:
        json.dump(results, f)
    print("\nCurrent values saved.")

5756 / 5757
Current values saved.


In [34]:
len(results)

5757

In [35]:
sorted(results, key=lambda x: x[1])

[('tares', 132.53482716692722),
 ('rates', 135.8108389786347),
 ('aloes', 136.6463435817266),
 ('nares', 139.52110474205315),
 ('tales', 139.53152683689422),
 ('saner', 142.1780441202015),
 ('lores', 143.67569914886226),
 ('reals', 144.41323606044816),
 ('roles', 144.67795726941114),
 ('lanes', 148.23223901337502),
 ('tears', 151.53778009379886),
 ('riles', 152.89299982629842),
 ('earls', 153.78513114469342),
 ('cares', 155.11082160847664),
 ('nates', 156.03873545249263),
 ('laser', 156.0467257252041),
 ('dares', 157.30328295987493),
 ('raise', 157.40646169880145),
 ('races', 158.6119506687511),
 ('nears', 158.65537606392218),
 ('teals', 158.68316831683168),
 ('tires', 158.92635052978983),
 ('stoae', 159.11638005905854),
 ('tries', 160.36529442417927),
 ('pares', 160.42782699322564),
 ('slier', 160.70818134445022),
 ('stare', 162.0043425395171),
 ('dates', 163.25812054889698),
 ('hares', 163.48914365120723),
 ('dales', 163.70349140177174),
 ('mares', 164.43894389438944),
 ('rapes', 164

In [143]:
def random_word():
    value = int(random.random() * len(words))
    return words[value]


def get_word_scores(all_words, remaining_words, current_info=None):
    results = []
    for assumed_guess in all_words:
        guess_results = []
        for assumed_answer in remaining_words:
            guess_info = wordle_guess(assumed_guess, assumed_answer)
            assumed_info = merge_guess_info(guess_info, current_info)
            assumed_remaining_words = filter_words(remaining_words, (*assumed_info))
            guess_results.append(len(assumed_remaining_words))
        
        score = sum(guess_results) / len(guess_results)
        results.append( (assumed_guess, score) )
        
    return sorted(results, key=lambda x: x[1])


def merge_guess_info(info1, info2):
    default_info = ({}, {}, set(), {}, {})
    
    if info1 == None:
        info1 = default_info
    if info2 == None:
        info2 = default_info
        
    # Merge Greens
    green = {**info1[0], **info2[0]}
    
    # Merge Yellows
    yellow = info1[1].union(info2[1])
    
    # Merge Reds
    red = info1[2].union(info2[2])
    
    # Merge bad_dupes
    bad_dupes = {**info1[3], **info2[3]}
    
    # Merge good_dupes
    good_dupes = info1[4]
    for key in info2[4]:
        if key in good_dupes:
            good_dupes[key] = max(good_dupes[key], info2[4][key])
        else:
            good_dupes[key] = info2[4][key]

    return (green, yellow, red, bad_dupes, good_dupes)
    
        

def work_out_word(answer, print_guesses=False):
    guess = "tares"
    remaining_words = words
    guess_info = None
    guesses = []
    
    while True:
        if print_guesses:
            print(guess)
        guesses.append(guess)
        guess_info = merge_guess_info(wordle_guess(guess, answer), guess_info)
        remaining_words = filter_words(remaining_words, *guess_info)
        if len(remaining_words) == 1:
            guesses.append(remaining_words[0])
            if print_guesses:
                print(f"Final Guess: {remaining_words[0]}")
                print(f"Number of guesses: {len(guesses)}")
            return len(guesses)
        else:
            word_scores = get_word_scores(words, remaining_words, guess_info)
            guess = sorted(word_scores, key=lambda x: x[1])[0][0]  

#### Test 1

In [107]:
answer = random_word()
guess = "tares"
guess_info = merge_guess_info(wordle_guess(guess, answer), None)
remaining_words = filter_words(words, *guess_info)
remaining_words

['steps',
 'cents',
 'items',
 'stems',
 'nests',
 'poets',
 'belts',
 'melts',
 'debts',
 'beets',
 'meets',
 'pests',
 'pelts',
 'diets',
 'exits',
 'sects',
 'vents',
 'fetus',
 'stews',
 'dents',
 'duets',
 'felts',
 'emits',
 'welts',
 'jests',
 'edits',
 'vests',
 'lefts',
 'zests',
 'bests',
 'hefts',
 'suets',
 'whets',
 'newts',
 'ethos',
 'gents',
 'bents',
 'stets',
 'beths',
 'wefts',
 'wests']

In [108]:
word_scores = get_word_scores(remaining_words, guess_info)
print(word_scores)
sorted(word_scores, key=lambda x: x[1])

[('nests', 7.7317073170731705), ('bents', 8.170731707317072), ('newts', 8.463414634146341), ('bests', 8.560975609756097), ('wests', 8.609756097560975), ('belts', 8.804878048780488), ('welts', 8.902439024390244), ('dents', 9.292682926829269), ('pests', 10.073170731707316), ('felts', 10.21951219512195), ('lefts', 10.21951219512195), ('wefts', 10.21951219512195), ('cents', 10.268292682926829), ('vents', 10.268292682926829), ('melts', 10.365853658536585), ('pelts', 10.463414634146341), ('vests', 10.658536585365853), ('debts', 10.902439024390244), ('gents', 11.097560975609756), ('beets', 11.195121951219512), ('jests', 11.487804878048781), ('zests', 11.487804878048781), ('diets', 12.365853658536585), ('whets', 12.609756097560975), ('beths', 12.902439024390244), ('hefts', 12.951219512195122), ('meets', 13.536585365853659), ('duets', 14.21951219512195), ('stews', 14.268292682926829), ('sects', 14.658536585365853), ('fetus', 14.658536585365853), ('items', 15.146341463414634), ('poets', 15.73170

[('nests', 7.7317073170731705),
 ('bents', 8.170731707317072),
 ('newts', 8.463414634146341),
 ('bests', 8.560975609756097),
 ('wests', 8.609756097560975),
 ('belts', 8.804878048780488),
 ('welts', 8.902439024390244),
 ('dents', 9.292682926829269),
 ('pests', 10.073170731707316),
 ('felts', 10.21951219512195),
 ('lefts', 10.21951219512195),
 ('wefts', 10.21951219512195),
 ('cents', 10.268292682926829),
 ('vents', 10.268292682926829),
 ('melts', 10.365853658536585),
 ('pelts', 10.463414634146341),
 ('vests', 10.658536585365853),
 ('debts', 10.902439024390244),
 ('gents', 11.097560975609756),
 ('beets', 11.195121951219512),
 ('jests', 11.487804878048781),
 ('zests', 11.487804878048781),
 ('diets', 12.365853658536585),
 ('whets', 12.609756097560975),
 ('beths', 12.902439024390244),
 ('hefts', 12.951219512195122),
 ('meets', 13.536585365853659),
 ('duets', 14.21951219512195),
 ('stews', 14.268292682926829),
 ('sects', 14.658536585365853),
 ('fetus', 14.658536585365853),
 ('items', 15.14634

In [109]:
guess2 = "nests"
guess_info = merge_guess_info(guess_info, wordle_guess(guess2, answer))
remaining_words = filter_words(remaining_words, *guess_info)
remaining_words

['cents', 'vents', 'dents', 'gents', 'bents']

So if we only consider words that might be the answer (i.e. Hard Mode), we can end up in a situation where there is one letter missing that could be any of a handful of letters, with no way to eliminate them other than try them all. This can easily take us over the 6 guess limit. Therefore, we need to check the Expected Value of EVERY word in the full word list, for every guess. This will presumably take about 4 seconds per iteration, but will give us a much better chance.

EDIT: Okay, it's a lot more than 4 seconds per iteration. This may not be entirely feasible...

EDIT 2: Okay, it's fine. A run only takes up to about a minute.

#### Test 2

In [113]:
guess = "tares"
guess_info = merge_guess_info(wordle_guess(guess, answer), None)
remaining_words = filter_words(words, *guess_info)
remaining_words

['steps',
 'cents',
 'items',
 'stems',
 'nests',
 'poets',
 'belts',
 'melts',
 'debts',
 'beets',
 'meets',
 'pests',
 'pelts',
 'diets',
 'exits',
 'sects',
 'vents',
 'fetus',
 'stews',
 'dents',
 'duets',
 'felts',
 'emits',
 'welts',
 'jests',
 'edits',
 'vests',
 'lefts',
 'zests',
 'bests',
 'hefts',
 'suets',
 'whets',
 'newts',
 'ethos',
 'gents',
 'bents',
 'stets',
 'beths',
 'wefts',
 'wests']

In [126]:
word_scores = get_word_scores(words, remaining_words, guess_info)
sorted(word_scores, key=lambda x: x[1])

[('lento', 6.951219512195122),
 ('blend', 7.048780487804878),
 ('sense', 7.195121951219512),
 ('plein', 7.2926829268292686),
 ('fiend', 7.536585365853658),
 ('newel', 7.536585365853658),
 ('below', 7.634146341463414),
 ('meson', 7.634146341463414),
 ('nests', 7.7317073170731705),
 ('wield', 7.780487804878049),
 ('mesne', 7.975609756097561),
 ('lemon', 8.024390243902438),
 ('melon', 8.024390243902438),
 ('bents', 8.170731707317072),
 ('felon', 8.268292682926829),
 ('field', 8.317073170731707),
 ('bench', 8.414634146341463),
 ('lieth', 8.414634146341463),
 ('lends', 8.463414634146341),
 ('newts', 8.463414634146341),
 ('newly', 8.512195121951219),
 ('defun', 8.512195121951219),
 ('whelm', 8.512195121951219),
 ('swish', 8.560975609756097),
 ('belch', 8.560975609756097),
 ('bests', 8.560975609756097),
 ('welch', 8.560975609756097),
 ('wench', 8.609756097560975),
 ('wests', 8.609756097560975),
 ('liens', 8.658536585365853),
 ('bedew', 8.658536585365853),
 ('whelp', 8.75609756097561),
 ('belt

In [129]:
guess2 = "lento"
guess_info = merge_guess_info(wordle_guess(guess2, answer), guess_info)
remaining_words = filter_words(remaining_words, *guess_info)
remaining_words

['cents', 'vents', 'dents', 'gents', 'bents']

In [130]:
word_scores = get_word_scores(words, remaining_words, guess_info)
sorted(word_scores, key=lambda x: x[1])

[('budge', 1.4),
 ('badge', 1.4),
 ('caged', 1.4),
 ('cubed', 1.4),
 ('caved', 1.4),
 ('debug', 1.4),
 ('cadge', 1.4),
 ('gibed', 1.4),
 ('bodge', 1.4),
 ('vocab', 1.4),
 ('gyved', 1.4),
 ('could', 2.2),
 ('being', 2.2),
 ('began', 2.2),
 ('above', 2.2),
 ('given', 2.2),
 ('black', 2.2),
 ('voice', 2.2),
 ('lived', 2.2),
 ('bring', 2.2),
 ('birds', 2.2),
 ('moved', 2.2),
 ('doing', 2.2),
 ('cried', 2.2),
 ('begin', 2.2),
 ('child', 2.2),
 ('build', 2.2),
 ('blood', 2.2),
 ('gives', 2.2),
 ('board', 2.2),
 ('cover', 2.2),
 ('dance', 2.2),
 ('basic', 2.2),
 ('drive', 2.2),
 ('bread', 2.2),
 ('verbs', 2.2),
 ('based', 2.2),
 ('goods', 2.2),
 ('crowd', 2.2),
 ('loved', 2.2),
 ('drove', 2.2),
 ('cabin', 2.2),
 ('doubt', 2.2),
 ('guide', 2.2),
 ('block', 2.2),
 ('cloud', 2.2),
 ('broad', 2.2),
 ('beach', 2.2),
 ('saved', 2.2),
 ('avoid', 2.2),
 ('edges', 2.2),
 ('magic', 2.2),
 ('chord', 2.2),
 ('brave', 2.2),
 ('climb', 2.2),
 ('globe', 2.2),
 ('judge', 2.2),
 ('begun', 2.2),
 ('bound', 2.2

In [131]:
guess3 = "budge"
guess_info = merge_guess_info(wordle_guess(guess3, answer), guess_info)
remaining_words = filter_words(remaining_words, *guess_info)
remaining_words

['cents', 'vents']

In [132]:
word_scores = get_word_scores(words, remaining_words, guess_info)
sorted(word_scores, key=lambda x: x[1])

[('which', 1.0),
 ('could', 1.0),
 ('place', 1.0),
 ('every', 1.0),
 ('never', 1.0),
 ('since', 1.0),
 ('above', 1.0),
 ('given', 1.0),
 ('music', 1.0),
 ('space', 1.0),
 ('black', 1.0),
 ('voice', 1.0),
 ('comes', 1.0),
 ('close', 1.0),
 ('lived', 1.0),
 ('vowel', 1.0),
 ('class', 1.0),
 ('piece', 1.0),
 ('river', 1.0),
 ('leave', 1.0),
 ('color', 1.0),
 ('moved', 1.0),
 ('heavy', 1.0),
 ('cried', 1.0),
 ('check', 1.0),
 ('watch', 1.0),
 ('carry', 1.0),
 ('clear', 1.0),
 ('child', 1.0),
 ('seven', 1.0),
 ('solve', 1.0),
 ('value', 1.0),
 ('gives', 1.0),
 ('lives', 1.0),
 ('force', 1.0),
 ('ocean', 1.0),
 ('scale', 1.0),
 ('cells', 1.0),
 ('rocks', 1.0),
 ('level', 1.0),
 ('reach', 1.0),
 ('catch', 1.0),
 ('cover', 1.0),
 ('waves', 1.0),
 ('dance', 1.0),
 ('cause', 1.0),
 ('basic', 1.0),
 ('drive', 1.0),
 ('local', 1.0),
 ('coast', 1.0),
 ('thick', 1.0),
 ('clean', 1.0),
 ('facts', 1.0),
 ('peace', 1.0),
 ('verbs', 1.0),
 ('visit', 1.0),
 ('chief', 1.0),
 ('cross', 1.0),
 ('cases', 1.0

#### Final Test

In [181]:
# This will possibly take up to 2 minutes to run
answer = random_word()
work_out_word(answer, True)
print(f"Answer: {answer}")

tares
doily
begin
Final Guess: jingo
Number of guesses: 4
Answer: jingo


#### Real World Test

In [176]:
# First Guess
guess_info = wordle_guess("tares","___ra")
print(guess_info)
remaining_words = filter_words(words, *guess_info)
print(len(remaining_words))
word_scores = get_word_scores(words, remaining_words, guess_info)
word_scores[:5]

({}, {('r', 2), ('a', 1)}, {'e', 's', 't'}, {}, {})
102


[('blond', 6.0588235294117645),
 ('bland', 6.411764705882353),
 ('crony', 6.450980392156863),
 ('bronc', 6.450980392156863),
 ('irony', 6.490196078431373)]

In [179]:
# Second Guess
guess_info = merge_guess_info(guess_info, wordle_guess("blond", "___n_"))
print(guess_info)
remaining_words = filter_words(words, *guess_info)
print(remaining_words)
word_scores = get_word_scores(words, remaining_words, guess_info)
word_scores[:5]

({3: 'n'}, {('r', 2), ('a', 1)}, {'d', 'l', 't', 'b', 'o', 's', 'e'}, {}, {})
['crank', 'frank', 'franc', 'prank']


[('force', 1.0),
 ('facts', 1.0),
 ('chief', 1.0),
 ('crops', 1.0),
 ('faces', 1.0)]

In [180]:
guess_info = merge_guess_info(guess_info, wordle_guess("force", "rc   "))
print(guess_info)
remaining_words = filter_words(remaining_words, *guess_info)
remaining_words

({3: 'n'}, {('r', 2), ('c', 3), ('a', 1)}, {'d', 'l', 't', 'f', 'b', 'o', 's', 'e'}, {}, {})


['crank']

CRANK was correct.