In [1]:
import numpy as np
import pandas as pd
import time

In [2]:
five = pd.concat([
    pd.read_csv("wordle-answers-alphabetical.txt",header=None,names=["word"]),
    pd.read_csv("wordle-allowed-guesses.txt",header=None,names=["word"])],axis=0,ignore_index=True
)
answers = pd.read_csv("wordle-answers-alphabetical.txt",header=None,names=["word"])

In [4]:
def wordle(answer,guess):
    sig=["B","B","B","B","B"]
    for i in range(5):
        if answer[i]==guess[i]:
            sig[i]="G"
    # create a count of remaining unidentified chars, ignoring the "G" positions
    dic={}
    for i in range(5):
        if sig[i]!="G":
            if answer[i] not in dic:
                dic[answer[i]]=1
            else:
                dic[answer[i]]+=1
    for i in range(5):
        if sig[i]!="G" and guess[i] in dic:
            if dic[guess[i]]>0:
                sig[i]="Y"
                dic[guess[i]]-=1
    ans=""
    for char in sig:
        ans+=char
    return ans

In [6]:
# determine the output cypher text for each plaintext word given a key
def cypher(words,key):
    return words.apply(lambda x: wordle(x,key)).rename("cypher")

In [8]:
# given a series of categorical values compute the information entropy
def entropy(series):
    dic={}
    n=len(series)
    for el in series:
        if el not in dic:
            dic[el]=1
        else:
            dic[el]+=1
    e=0
    for key in dic:
        p=dic[key]/n
        if p>0:
            e -= p*np.log(p)
    return e

In [9]:
# compute the entropy of the responses as a probability distribution for a given guess
guess="tares"
entropy(cypher(five.word,guess))

4.2933900573740065

In [10]:
# Determine the best next guess by choosing the word which maximizes information entropy
# considers all words as potential candidates.
def nextWord(possibleWords,allWords):
    n=len(possibleWords)
    ans=""
    m=0
    for i in range(len(allWords)):
        word=allWords.iloc[i]
        series=cypher(possibleWords,word)
        e=entropy(series)
        # apply a boost for words which are still candiates
        # boost is positive since np.log(1-1/n)<0
        if i in possibleWords.index:
            e-=(1-1/n)*np.log(1-1/n)
        if e>m:
            m=e
            ans=word
    return ans

In [24]:
startTime=time.time()
answers = pd.read_csv("wordle-answers-alphabetical.txt",header=None,names=["word"])
# initial guess
file=open('wordle-guesses.txt','w')
distribution=[0 for i in range(7)]
num=0
for answer in answers.word:
    df=answers
    guess="soare"
    file.write(guess) # write initial guess to file
    ct=1 # initialize guess count at 1
    while guess!=answer:
        response=wordle(answer,guess)
        df=df[cypher(df.word,guess)==response] #filter possible words
        if df.shape[0]==1:
            guess=df.word.iloc[0]
        else:
            guess=nextWord(df.word,five.word)
        file.write(',')
        file.write(guess) # write next guess to file, stop when guess==answer
        ct+=1
    file.write('\n')
    if ct>6:
        distribution[6]+=1
    else:
        distribution[ct-1]+=1
    num+=1
    if num%100==0:
        print("Finished",num,"words in",round((time.time() - startTime)/60,2),"minutes")
file.close()
print(np.average(distribution))
print(distribution)

Finished 100 words in 8.04 minutes
Finished 200 words in 16.88 minutes
Finished 300 words in 26.15 minutes
Finished 400 words in 34.97 minutes
Finished 500 words in 42.92 minutes
Finished 600 words in 52.76 minutes
Finished 700 words in 62.01 minutes
Finished 800 words in 71.87 minutes
Finished 900 words in 81.0 minutes
Finished 1000 words in 89.78 minutes
Finished 1100 words in 99.85 minutes
Finished 1200 words in 109.63 minutes
Finished 1300 words in 118.84 minutes
Finished 1400 words in 126.51 minutes
Finished 1500 words in 136.34 minutes
Finished 1600 words in 145.12 minutes
Finished 1700 words in 150.8 minutes
Finished 1800 words in 157.18 minutes
Finished 1900 words in 163.24 minutes
Finished 2000 words in 170.09 minutes
Finished 2100 words in 178.58 minutes
Finished 2200 words in 187.59 minutes
Finished 2300 words in 197.28 minutes
330.7142857142857
[0, 46, 1217, 987, 64, 1, 0]
