In [116]:
import numpy as np
import pandas as pd
from IPython.display import display

In [117]:
#Using a set for faster lookups than a numpy array
def loadGuessableWords():
    with open("wordle_legal_words.txt", "r") as file:
        words = set(word.strip() for word in file)
    return words

def loadAnswers():
    with open("wordle_answers.txt", "r") as file:
        answers = set(answer.strip() for answer in file)
    return answers

In [118]:
#return a list of the colour pattern for a given guess and answer: 0 is grey, 1 is yellow, 2 is green
def generateColourPattern(guess,answer) :
    pattern = [0,0,0,0,0]
    correctGuesses = []

    #check for green
    for i in range(5) :
        if guess[i] == answer[i] :
            pattern[i] = 2
            correctGuesses.append(guess[i])

    #check for yellow
    for i in range(5) :
        if guess[i] in answer and guess[i] != answer[i] :
            #yellows letter are highlighted in order, up to the count in the answer minus the number guessed correctly
            n = answer.count(guess[i])
            c = correctGuesses.count(guess[i])
            if guess.count(guess[i],0,i+1) <= n-c :
                pattern[i] = 1

    return pattern

In [119]:
#testcases for generateColourPattern
assert generateColourPattern("apple","hello") == [0,0,0,2,1]
assert generateColourPattern("hello","hello") == [2,2,2,2,2]
assert generateColourPattern("hello","apple") == [0,1,0,2,0]
assert generateColourPattern("lllll","apple") == [0,0,0,2,0]
assert generateColourPattern("llple","aaall") == [1,0,0,2,0]

In [132]:
#for every word in the list of answers, generate the pattern for every possible guess
def calculateEntropy(distributionDict) :
    observations = distributionDict.values()
    total = sum(observations)
    probabilities = [x/total for x in observations]
    entropy = -sum([p*np.log2(p) for p in probabilities])
    return entropy

def generateEntropyDict(possibleAnswers) :
    dh = display('Starting...',display_id=True)
    wordEntropies = {}
    for guess in loadGuessableWords() :
        dh.update('Evaluating ' + guess)
        patternDist = {}
        for answer in possibleAnswers :
            pattern = ''.join(str(x) for x in generateColourPattern(guess,answer))
            patternDist[pattern] = patternDist.get(pattern,0) + 1 #increment the count for this pattern
        e = calculateEntropy(patternDist)
        wordEntropies[guess] = wordEntropies.get(guess,0) + e
    dh.update('Complete')
    return wordEntropies

In [133]:
d = generateEntropyDict(loadAnswers())

'Complete'

In [134]:
max(d, key=d.get)

'soare'

In [135]:
with open('first_turn_entropies.txt', 'w') as file:
    for key, value in d.items():
        file.write(f'{key}: {value}\n')