In [1]:
import numpy as np
import pandas as pd

In [24]:
# get the list of words from Knuth's Graph Base list of five-letter words
# five = pd.read_csv("sgb-words.txt",header=None,names=["word"])

# This source uses a larger pool of almost 13,000 words
five = pd.concat([
    pd.read_csv("wordle-answers-alphabetical.txt",header=None,names=["word"]),
    pd.read_csv("wordle-allowed-guesses.txt",header=None,names=["word"])],axis=0
)

In [25]:
five.head(10)

Unnamed: 0,word
0,aback
1,abase
2,abate
3,abbey
4,abbot
5,abhor
6,abide
7,abled
8,abode
9,abort


In [3]:
def wordle(answer,guess):
    sig=["B","B","B","B","B"]
    for i in range(5):
        if answer[i]==guess[i]:
            sig[i]="G"
    # create a count of remaining unidentified chars, ignoring the "G" positions
    dic={}
    for i in range(5):
        if sig[i]!="G":
            if answer[i] not in dic:
                dic[answer[i]]=1
            else:
                dic[answer[i]]+=1
    for i in range(5):
        if sig[i]!="G" and guess[i] in dic:
            if dic[guess[i]]>0:
                sig[i]="Y"
                dic[guess[i]]-=1
    ans=""
    for char in sig:
        ans+=char
    return ans

In [4]:
answer="ready"
guess="rhyme"
wordle(answer,guess)

'GBYBY'

In [5]:
# determine the output cypher text for each plaintext word given a key
def cypher(df,key):
    return df["word"].apply(lambda x: wordle(x,key)).rename("cypher")

In [6]:
# apply a guess to every word in the list
key="ready"
print("Using the guess:",key)
pd.concat([five.word,cypher(five,key)],axis=1).head(10)

Using the guess: ready


Unnamed: 0,word,cypher
0,aahed,BYYYB
1,aalii,BBYBB
2,aargh,YBYBB
3,aarti,YBYBB
4,abaca,BBGBB
5,abaci,BBGBB
6,abacs,BBGBB
7,abaft,BBGBB
8,abaka,BBGBB
9,abamp,BBGBB


In [7]:
# given a series of categorical values compute the information entropy
def entropy(series):
    dic={}
    n=len(series)
    for el in series:
        if el not in dic:
            dic[el]=1
        else:
            dic[el]+=1
    e=0
    for key in dic:
        p=dic[key]/n
        if p>0:
            e -= p*np.log(p)
    return e

In [8]:
# compute the entropy of the responses as a probability distribution for a given guess
guess="tares"
entropy(cypher(five,guess))

4.267821571562566

In [40]:
# Determine the best next guess by choosing the word which maximizes information entropy
# considers all words as potential candidates.
def nextWord(df,allWords):
    n=len(df.word)
    ans=""
    m=0
    for i in range(len(allWords)):
        word=allWords.iloc[i]
        series=cypher(df,word)
        e=entropy(series)
        # apply a boost for words which are still candiates
        # boost is positive since np.log(1-1/n)<0
        if i in df.word.index:
            e-=(1-1/n)*np.log(1-1/n)
        if e>m:
            m=e
            ans=word
    return ans

In [10]:
# prompt user to respond with the Wordle response, e.g. as "BYYBG"
def getResponse():
    while(True):
        print("Enter response: ")
        string=input()
        string=string.upper()
        if len(string)==0:
            return ""
        if len(string)!=5:
            print("Response must be exactly 5 characters")
        else:
            done=1
            for char in string:
                if char not in ("B","G","Y"):
                    print("Enter Wordle response using 'B','Y','G'")
                    done=0
                    break
            if done==1:
                break
    return string

In [11]:
# Recursively find the best word until solved
def solver(df):
    guess="tares"
    while(True):
        print("Next guess:")
        print("\t",guess)
        sig=getResponse()
        if sig=="":
            break
        if sig=="GGGGG":
            print("Then that's the word!")
            break
        df=df[cypher(df,guess)==sig]
        if df.shape[0]==1:
            print("Then the word must be:")
            print("\t",df.word.iloc[0])
            break
        if df.shape[0]==0:
            print("No such word was found.")
            break
        guess=nextWord(df,five.word)

In [41]:
solver(five)

Next guess:
	 tares
Enter response: 
bbbbb
Next guess:
	 colin
Enter response: 
byyby
Next guess:
	 gopak
Enter response: 
bybby
Then the word must be:
	 knoll
