In [1]:
# import libraries
import pandas as pd
import re
#from english_words import english_words_set
from wordfreq import zipf_frequency # count the usage frequency of words

In [2]:
# wordle word pool obtained from: https://gist.github.com/cfreshman

# before nyt bought wordle
guesses_org = pd.read_csv('wordle-allowed-guesses.txt', header=None)
answers_org = pd.read_csv('wordle-answers-alphabetical.txt', header=None)

# after nyt bought wordle
guesses_nyt = pd.read_csv('wordle-nyt-allowed-guesses.txt', header=None)
answers_nyt = pd.read_csv('wordle-nyt-answers-alphabetical.txt', header=None)

In [3]:
# combine the allowed guesses and answers to create the complete allowed word list
word_pool_org = guesses_org[0].to_list() + answers_org[0].to_list()
word_pool_nyt = guesses_nyt[0].to_list() + answers_nyt[0].to_list()

In [4]:
print(len(word_pool_org))
print(len(word_pool_nyt))

12972
12947


In [5]:
# count the usage frequency of words in word_list using the wordfreq library
# more information about the library: https://pypi.org/project/wordfreq/
def create_usage_freq_d(word_list):    
    usage_freq_d = {}
    for word in word_list:
        usage_freq_d[word] = zipf_frequency(word, 'en')
        
    # sort the dict by the usage frequency score in desc order
    sorted_usage_freq_d = {k: v for k, v in reversed(sorted(usage_freq_d.items(), key=lambda item: item[1]))}
    return sorted_usage_freq_d

In [6]:
# this function is not used
def create_letter_score_d(word_list):
    letters_d = {}
    for letter in ''.join(word_list):
        if letter in letters_d:
            letters_d[letter] += 1
        else:
            letters_d[letter] = 1
        
    freq_d = {key: 0 for key in word_list}
    for word in word_list:
        unique_letter_word = "".join(set(word))
        for n in unique_letter_word:
            freq_d[word] += letters_d[n]
        
    sorted_freq_d = {k: v for k, v in reversed(sorted(freq_d.items(), key=lambda item: item[1]))}

In [7]:
def get_pattern(guess, answer):
    c = 0
    pattern = ''
    while c < 5:
        if guess[c] == answer[c]:
            pattern+='g'
        elif guess[c] in answer:
            pattern+='y'
        else:
            pattern+='b'
        c+=1
    return pattern

In [8]:
# function to update the word list based on the green/yellow/black feedback after each guess
# function returns an updated and usage frequency sorted word list
def update_pool(pool, guess, pattern):
    pattern_dict = {}
    for n in range(5):
        if list(guess)[n] not in pattern_dict:
            pattern_dict[list(guess)[n]] = list(pattern)[n]
        elif list(guess)[n]+'1' in pattern_dict:
            pattern_dict[list(guess)[n]+'2'] = list(pattern)[n]
        else:
            pattern_dict[list(guess)[n]+'1'] = list(pattern)[n]
        
    b_list = []
    for k, v in pattern_dict.items():
        if v =='b':
            b_list.append(k[0])
            
    g_list = []
    for k, v in pattern_dict.items():
        if v =='g':
            g_list.append(k[0])
            
    b_list = [letter for letter in b_list if letter not in g_list]
            
    #print('Black List:')
    #print(b_list)
    #print("")
    
    if len(b_list) != 0:
        for b_letter in b_list:
            pool = [word for word in pool if b_letter not in word]
            
    #print('After Black:')
    #print(pool)
    #print("")
            
    g_pattern = ''
    for k, v in pattern_dict.items():
        if v =='g':
            g_pattern+=k[0]
        else:
            g_pattern+='.'
    p = re.compile(g_pattern)
    pool = [word for word in pool if p.match(word)]
    
    #print("After Green:")
    #print(pool)
    #print("")
    
    count = 0
    y_dict = {}
    for k, v in pattern_dict.items():
        if v =='y':
            y_dict[k] = count
        count+=1
    
    g_list = []
    for k, v in pattern_dict.items():
        if v =='g':
            g_list.append(k[0])

    remove_list = []
    for k, v in y_dict.items():
        for word in pool:
            if (word.find(k[0]) == v):
                remove_list.append(word)
    
    pool = [word for word in pool if word not in remove_list]
    
    y_list = [y[0] for y in list(y_dict.keys())]
    if len(y_list) != 0:
        for y_letter in y_list:
            pool = [word for word in pool if y_letter in word]
    
    try:
        pool.remove(guess)
    except:
        pass
        
    # select the pairs in updated word list from the word usage frequency dictionary
    freq_d = {word_key: usage_freq_d[word_key] for word_key in pool}
    sorted_freq_d = {k: v for k, v in reversed(sorted(freq_d.items(), key=lambda item: item[1]))}
    return sorted_freq_d
    #return list(sorted_freq_d.keys())

In [9]:
usage_freq_d = create_usage_freq_d(word_pool_nyt)

In [10]:
# check the word usage frequency dictionary
list(usage_freq_d.items())[:10]

[('about', 6.4),
 ('their', 6.33),
 ('there', 6.31),
 ('which', 6.3),
 ('would', 6.27),
 ('other', 6.16),
 ('first', 6.11),
 ('after', 6.11),
 ('think', 6.08),
 ('could', 6.06)]

## Single trial

In [11]:
#test how many guesses the function need to use to solve one puzzle

sample_answer = "nymph"
guess = 'roate' # set the starting word whatever you like
guess_count = 1
word_pool = word_pool_nyt

while guess != sample_answer:
    print('Guess count:', guess_count)
    print('Guess:', guess)
    print('Feedback:', get_pattern(guess, sample_answer))
    print('')
    word_pool = update_pool(word_pool, guess, get_pattern(guess, sample_answer))
    #print(word_pool)
    guess = list(update_pool(word_pool, guess, get_pattern(guess, sample_answer)).keys())[0]
    guess_count+=1
    
print('Guess count:', guess_count)
print('Guess:', guess)
print('Feedback:', get_pattern(guess, sample_answer))
print('')

Guess count: 1
Guess: roate
Feedback: bbbbb

Guess count: 2
Guess: which
Feedback: bybbg

Guess count: 3
Guess: flush
Feedback: bbbbg

Guess count: 4
Guess: nymph
Feedback: ggggg



## Helper

In [59]:
# actually use the function to solve puzzle, without the answer
new_pool = update_pool(word_pool_nyt, 'roate', 'gbbby')
new_pool

{'rules': 5.06,
 'river': 5.03,
 'reply': 4.5,
 'refer': 4.45,
 'ruled': 4.3,
 'rebel': 4.08,
 'reign': 4.07,
 'rises': 4.07,
 'rides': 4.06,
 'rider': 4.02,
 'remix': 3.99,
 'riley': 3.9,
 'risen': 3.86,
 'ruler': 3.84,
 'renew': 3.73,
 'resin': 3.61,
 'reuse': 3.37,
 'relic': 3.37,
 'reefs': 3.36,
 'reins': 3.31,
 'repel': 3.2,
 'reels': 3.12,
 'revel': 3.11,
 'reddy': 3.08,
 'revue': 3.07,
 'rupee': 3.04,
 'reeds': 3.02,
 'reeve': 2.91,
 'rerun': 2.9,
 'runes': 2.89,
 'riser': 2.88,
 'riled': 2.84,
 'reeks': 2.81,
 'redux': 2.74,
 'riven': 2.72,
 'recur': 2.65,
 'ripen': 2.64,
 'reiki': 2.59,
 'reedy': 2.56,
 'rebus': 2.53,
 'rimes': 2.4,
 'rebbe': 2.35,
 'rumen': 2.3,
 'redid': 2.29,
 'rives': 2.23,
 'rubes': 2.22,
 'recce': 2.22,
 'riles': 2.21,
 'revie': 2.16,
 'ruder': 2.13,
 'renin': 2.09,
 'rubel': 1.96,
 'ribes': 1.96,
 'riper': 1.95,
 'rices': 1.89,
 'repin': 1.87,
 'ruses': 1.84,
 'rebuy': 1.84,
 'ricer': 1.8,
 'rheum': 1.79,
 'renne': 1.79,
 'rimer': 1.72,
 'regie': 1.66,


In [60]:
new_pool_1 = update_pool(new_pool, 'rules', 'gbbgb')
new_pool_1

{'river': 5.03,
 'refer': 4.45,
 'rider': 4.02,
 'renew': 3.73,
 'riven': 2.72,
 'ripen': 2.64,
 'riper': 1.95,
 'ricer': 1.8,
 'rimer': 1.72,
 'riced': 1.43,
 'rebec': 1.29,
 'riped': 1.22,
 'rived': 1.21,
 'rekey': 1.14,
 'remen': 1.14,
 'ricey': 1.03,
 'rimed': 1.03,
 'reded': 0.0,
 'refed': 0.0,
 'rehem': 0.0,
 'reked': 0.0,
 'remex': 0.0,
 'reney': 0.0,
 'repeg': 0.0,
 'rewed': 0.0,
 'rifer': 0.0,
 'ryked': 0.0,
 'ryper': 0.0}

In [61]:
new_pool_2 = update_pool(new_pool_1, 'river', 'gbbgb')
new_pool_2

{'refer': 4.45,
 'renew': 3.73,
 'rebec': 1.29,
 'remen': 1.14,
 'rekey': 1.14,
 'ryper': 0.0,
 'ryked': 0.0,
 'rewed': 0.0,
 'repeg': 0.0,
 'reney': 0.0,
 'remex': 0.0,
 'reked': 0.0,
 'rehem': 0.0,
 'refed': 0.0,
 'reded': 0.0}

In [62]:
new_pool_3 = update_pool(new_pool_1, 'refer', 'ggbgb')
new_pool_3

{'renew': 3.73,
 'rebec': 1.29,
 'remen': 1.14,
 'rekey': 1.14,
 'rewed': 0.0,
 'repeg': 0.0,
 'reney': 0.0,
 'remex': 0.0,
 'reked': 0.0,
 'rehem': 0.0,
 'reded': 0.0}

## Test with all words

In [80]:
%%time
#test how many guesses the function need to use to solve all the puzzles

def guessing(first_guess, answer_list, fail=0):
    
    if type(answer_list) == str:
        word_pool = word_pool_nyt
        guess_count = 1
        guess = first_guess
        while guess != answer:
            word_pool = update_pool(word_pool, guess, get_pattern(guess, answer))
            guess = list(update_pool(word_pool, guess, get_pattern(guess, answer)).keys())[0]
            guess_count+=1
        return guess_count
    
    else:
        guesses_list = []
        for answer in answer_list:
            word_pool = word_pool_nyt
            guess_count = 1
            guess = first_guess
            while guess != answer:
                word_pool = update_pool(word_pool, guess, get_pattern(guess, answer))
                guess = list(update_pool(word_pool, guess, get_pattern(guess, answer)).keys())[0]
                guess_count+=1
            guesses_list.append(guess_count)
            #print(guess, answer, guess_count)
        if fail == 1:
            fail_count = sum(guess > 6 for guess in guesses_list)
            guesses_list = [guess for guess in guesses_list if guess <= 6]
            return sum(guesses_list)/len(guesses_list), len(guesses_list)/(len(guesses_list)+fail_count)
        else:
            return sum(guesses_list)/len(guesses_list)

print("Average guess used, success percentage(solve within 6 guesses)")
guessing('plate', answers_nyt[0].to_list(), 1)

aback aback 4
abase abase 5
abate abate 5
abbey abbey 4
abbot abbot 3
abhor abhor 6
abide abide 5
abled abled 5
abode abode 4
abort abort 3
about about 2
above above 3
abuse abuse 4
abyss abyss 5
acorn acorn 4
acrid acrid 4
actor actor 3
acute acute 3
adage adage 4
adapt adapt 3
adept adept 3
admin admin 3
admit admit 3
adobe adobe 3
adopt adopt 3
adore adore 5
adorn adorn 5
adult adult 3
affix affix 4
afire afire 7
afoot afoot 3
afoul afoul 3
after after 2
again again 2
agape agape 4
agate agate 4
agent agent 3
agile agile 4
aging aging 3
aglow aglow 4
agony agony 3
agree agree 3
ahead ahead 3
aider aider 5
aisle aisle 4
alarm alarm 4
album album 3
alert alert 2
algae algae 4
alibi alibi 4
alien alien 3
align align 3
alike alike 4
alive alive 3
allay allay 5
alley alley 4
allot allot 4
allow allow 3
alloy alloy 4
aloft aloft 4
alone alone 2
along along 2
aloof aloof 5
aloud aloud 3
alpha alpha 2
altar altar 3
alter alter 3
amass amass 5
amaze amaze 4
amber amber 3
amble amble 5
amend 

dread dread 4
dream dream 4
dress dress 4
dried dried 7
drier drier 6
drift drift 3
drill drill 3
drink drink 4
drive drive 4
droit droit 4
droll droll 4
drone drone 6
drool drool 4
droop droop 3
dross dross 5
drove drove 5
drown drown 6
druid druid 5
drunk drunk 4
dryer dryer 5
dryly dryly 4
duchy duchy 3
dully dully 4
dummy dummy 5
dumpy dumpy 5
dunce dunce 6
dusky dusky 6
dusty dusty 4
dutch dutch 4
duvet duvet 5
dwarf dwarf 4
dwell dwell 5
dwelt dwelt 5
dying dying 6
eager eager 4
eagle eagle 3
early early 2
earth earth 2
easel easel 3
eaten eaten 4
eater eater 6
ebony ebony 4
eclat eclat 4
edict edict 4
edify edify 5
eerie eerie 6
egret egret 5
eight eight 3
eject eject 4
eking eking 5
elate elate 3
elbow elbow 3
elder elder 3
elect elect 3
elegy elegy 4
elfin elfin 4
elide elide 4
elite elite 2
elope elope 3
elude elude 4
email email 3
embed embed 5
ember ember 7
emcee emcee 4
empty empty 2
enact enact 4
endow endow 4
enema enema 5
enemy enemy 5
enjoy enjoy 5
ennui ennui 5
ensue 

macro macro 4
madam madam 5
madly madly 6
mafia mafia 5
magic magic 3
magma magma 5
maize maize 3
major major 3
maker maker 3
mambo mambo 5
mamma mamma 6
mammy mammy 7
manga manga 4
mange mange 7
mango mango 4
mangy mangy 5
mania mania 4
manic manic 6
manly manly 5
manor manor 6
maple maple 3
march march 4
marry marry 6
marsh marsh 5
mason mason 4
masse masse 4
match match 4
matey matey 5
mauve mauve 6
maxim maxim 6
maybe maybe 2
mayor mayor 4
mealy mealy 5
meant meant 3
meaty meaty 4
mecca mecca 5
medal medal 4
media media 3
medic medic 4
melee melee 5
melon melon 4
mercy mercy 6
merge merge 5
merit merit 3
merry merry 8
metal metal 3
meter meter 5
metro metro 4
micro micro 4
midge midge 5
midst midst 3
might might 3
milky milky 5
mimic mimic 4
mince mince 4
miner miner 5
minim minim 6
minor minor 4
minty minty 7
minus minus 5
mirth mirth 6
miser miser 7
missy missy 5
mocha mocha 5
modal modal 4
model model 3
modem modem 6
mogul mogul 4
moist moist 4
molar molar 4
moldy moldy 4
money 

skirt skirt 4
skulk skulk 4
skull skull 3
skunk skunk 4
slack slack 3
slain slain 6
slang slang 4
slant slant 3
slash slash 5
slate slate 2
sleek sleek 4
sleep sleep 2
sleet sleet 3
slept slept 2
slice slice 3
slick slick 4
slide slide 3
slime slime 4
slimy slimy 5
sling sling 4
slink slink 4
sloop sloop 4
slope slope 2
slosh slosh 6
sloth sloth 3
slump slump 3
slung slung 8
slunk slunk 5
slurp slurp 4
slush slush 5
slyly slyly 5
smack smack 5
small small 2
smart smart 3
smash smash 4
smear smear 5
smell smell 4
smelt smelt 4
smile smile 3
smirk smirk 5
smite smite 4
smith smith 3
smock smock 5
smoke smoke 4
smoky smoky 5
smote smote 5
snack snack 4
snail snail 3
snake snake 3
snaky snaky 5
snare snare 4
snarl snarl 4
sneak sneak 4
sneer sneer 4
snide snide 5
sniff sniff 4
snipe snipe 4
snoop snoop 4
snore snore 4
snort snort 4
snout snout 6
snowy snowy 4
snuck snuck 4
snuff snuff 4
soapy soapy 5
sober sober 6
soggy soggy 5
solar solar 3
solid solid 3
solve solve 4
sonar sonar 4
sonic 

(4.026749888542131, 0.97141619748809)