In [47]:
'''GAME PLAN

for each word in guesses:
  add up the frequencies of each letter from letters_freq
  call it total_freq
end with a dict with each word having a total_freq
word in guesses with the highest total_freq is the best one


when clue comes in:
1 - narrow down guesses
2 - find the one with the most frequent letters (same method as above)
3 - repeat
'''

'GAME PLAN\n\nfor each word in guesses:\n  add up the frequencies of each letter from letters_freq\n  call it total_freq\nend with a dict with each word having a total_freq\nword in guesses with the highest total_freq is the best one\n\n\nwhen clue comes in:\n1 - narrow down guesses\n2 - find the one with the most frequent letters (same method as above)\n3 - repeat\n'

In [48]:
import numpy as np
import pandas as pd
from time import sleep

gPath = 'wordle_data/wordle-allowed-guesses.txt'
aPath = 'wordle_data/wordle-answers-alphabetical.txt'

# read files

gFile = open(gPath, 'r')
guesses = gFile.read()
aFile = open(aPath, 'r')
answers = aFile.read()
guesses += '\n' + answers
guesses, answers = np.array(guesses.split('\n')), np.array(answers.split('\n'))
guesses, answers= np.sort(guesses), np.sort(answers)

In [49]:
# log letter frequencies (old method)

letters = ''.join(answers)

letters_freq = {i:letters.count(i) for i in set(letters)} # set() gets the unique values
letters_freq = {k: v for k, v in sorted(letters_freq.items(), key=lambda item: item[1], reverse=True)} # sort by most frequent
print(letters_freq)


# get top 5 most frequent letters
top5 = dict(list(letters_freq.items())[:5]) # slice list: first 5
print(top5)

{'e': 1233, 'a': 979, 'r': 899, 'o': 754, 't': 729, 'l': 719, 'i': 671, 's': 669, 'n': 575, 'c': 477, 'u': 467, 'y': 425, 'd': 393, 'h': 389, 'p': 367, 'm': 316, 'g': 311, 'b': 281, 'f': 230, 'k': 210, 'w': 195, 'v': 153, 'z': 40, 'x': 37, 'q': 29, 'j': 27}
{'e': 1233, 'a': 979, 'r': 899, 'o': 754, 't': 729}


In [50]:
# log letter frequencies (new method)

letters = []
# convert each word to a list of letters
letters = [[char for char in word] for word in answers]

# make a list of length 5 with every letter in each position
lettersCopy = [''] * 5
for word in letters:
    for i in range(5):
        lettersCopy[i] += word[i]

# get count of unique characters specific to each position
lettersFreq = [{key : string.count(key) for key in string} for string in lettersCopy]
for position in lettersFreq:
    for char in 'abcdefghijklmnopqrstuvwxyz':
        if char not in position:
            position[char] = 0

lettersFreq = [{k: v for k, v in sorted(i.items(), key=lambda item: item[1], reverse=True)} for i in lettersFreq] # sort by most frequent
for freq in lettersFreq:
    print(freq)

{'s': 366, 'c': 198, 'b': 173, 't': 149, 'p': 142, 'a': 141, 'f': 136, 'g': 115, 'd': 111, 'm': 107, 'r': 105, 'l': 88, 'w': 83, 'e': 72, 'h': 69, 'v': 43, 'o': 41, 'n': 37, 'i': 34, 'u': 33, 'q': 23, 'j': 20, 'k': 20, 'y': 6, 'z': 3, 'x': 0}
{'a': 304, 'o': 279, 'r': 267, 'e': 242, 'i': 202, 'l': 201, 'u': 186, 'h': 144, 'n': 87, 't': 77, 'p': 61, 'w': 44, 'c': 40, 'm': 38, 'y': 23, 'd': 20, 'b': 16, 's': 16, 'v': 15, 'x': 14, 'g': 12, 'k': 10, 'f': 8, 'q': 5, 'z': 2, 'j': 2}
{'a': 307, 'i': 266, 'o': 244, 'e': 177, 'u': 165, 'r': 163, 'n': 139, 'l': 112, 't': 111, 's': 80, 'd': 75, 'g': 67, 'm': 61, 'p': 58, 'b': 57, 'c': 56, 'v': 49, 'y': 29, 'w': 26, 'f': 25, 'k': 12, 'x': 12, 'z': 11, 'h': 9, 'j': 3, 'q': 1}
{'e': 318, 'n': 182, 's': 171, 'a': 163, 'l': 162, 'i': 158, 'c': 152, 'r': 152, 't': 139, 'o': 132, 'u': 82, 'g': 76, 'd': 69, 'm': 68, 'k': 55, 'p': 50, 'v': 46, 'f': 35, 'h': 28, 'w': 25, 'b': 24, 'z': 20, 'x': 3, 'y': 3, 'j': 2, 'q': 0}
{'e': 424, 'y': 364, 't': 253, 'r': 

In [51]:
def generateClue(guess, answer):
  '''
  returns a list of clues based on given guess and correct word, and the guess used
  PRECONDITIONS: 
  - guess: 5-letter string in guesses
  - answer: 5-letter string in answers
  0 = GREY
  1 = YELLOW
  2 = GREEN
  '''
  # nuance: duplicate letters in guess should be grey if the other is green BUT they show as yellow instead
  clues = [0, 0, 0, 0, 0]
  for i in range(5):
    if guess[i] in answer:
      if guess[i] == answer[i]:
        clues[i] = 2
      else:
        clues[i] = 1
    else:
      clues[i] = 0
  
  return clues  

In [52]:
def narrowGuesses(clues, guess, currentGuesses):
  '''
  returns a narrowed list of possible guesses based on clues given for a guess
  PRECONDITIONS:
  - clues: list length 5 of only 0, 1, or 2
  - guess: 5-letter string in guesses (not necessarily in currentGuesses)
  - currentGuesses: np array of 5-letter strings, sublist of guesses
  '''
  # first remove the current guess
  currentGuesses = np.delete(currentGuesses, np.where(currentGuesses == guess))
  for i in range(5):
    letter = guess[i]
    '''
    clues: [0 1 1 2 0]
    asnwer: terror
    guess: train
    letter: a
    clue: yellow (1)
    '''

    if clues[i] == 0: # grey
      # remove any words in currentGuesses that HAVE the current letter
      for word in currentGuesses:
        if letter in word:
          currentGuesses = np.delete(currentGuesses, np.where(currentGuesses == word))

    elif clues[i] == 1: # yellow
      # remove any words that DON'T HAVE the current letter OR have the letter in the SAME PLACE
      for word in currentGuesses:
        if (letter == word[i]) or (letter not in word):
          currentGuesses = np.delete(currentGuesses, np.where(currentGuesses == word))

    elif clues[i] == 2: # green
      # remove any words that DON'T HAVE the current letter AT POSITION i
      for word in currentGuesses:
        if (not letter == word[i]) or (letter not in word):
          currentGuesses = np.delete(currentGuesses, np.where(currentGuesses == word))      
  
  return currentGuesses

In [53]:
def bestGuess(currentGuesses, duplicatePenalty=1, method=2):
  '''
  returns the best guess (one with the most frequent letters) based on the given options, and a list of the best options
  PRECONDITIONS:
  - currentGuesses: np array of 5-letter strings
  '''
  # convert currentGuesses to a dict so each word can be given a value
  freqDict = dict(enumerate(currentGuesses, 1))
  freqDict = {value:key for key, value in freqDict.items()} # reverse keys and values
  for word in currentGuesses:
    totalFreq = 0
    for i in range(5): # new method
      letter = word[i]
      if method==1:
        totalFreq += letters_freq.get(letter) * (duplicatePenalty/word.count(letter))
      if method==2:
        totalFreq += lettersFreq[i].get(letter) * (duplicatePenalty/word.count(letter))
    freqDict[word] = totalFreq
  # sort by highest value
  freqDict = {k: v for k, v in sorted(freqDict.items(), key=lambda item: item[1], reverse=True)}
  
  # print(freqDict)
  best = list(freqDict.keys())[0] # fetch highest value (should be first)
  bestWords = list(freqDict.keys())
  return best, bestWords

In [54]:
def runWordle(numTimes=1, display=True, firstWord=bestGuess(guesses)[0], method=2):
  '''
  runs the game a given number of times (default=1)
  returns: np array of # tries it takes each time (for data analysis purposes)
  '''
  triesLog = np.array([])

  for i in range(numTimes):
    # game loop
    
    # setup
    answer = np.random.choice(answers) # choose random answer
    # answer='hoard'
    clues = [] # initiate clues, currentGuesses, guess
    guess = ''
    currentGuesses = guesses
    bestWords = {}
    tries = 0

    if display: print('Chosen answer: ', answer)
    while not clues == [2, 2, 2, 2, 2]:
      guess, bestWords = bestGuess(currentGuesses, method)
      if display:
        print('Best guesses: ', bestWords[:20])
        print('Current guess: ', guess)
        sleep(0.5)
      if tries == 0:
        clues = generateClue(firstWord, answer)
      else:
        clues = generateClue(guess, answer)
      if display:
        print('Current clues: ', clues)
        sleep(0.5)
      currentGuesses = narrowGuesses(clues, guess, currentGuesses)
      tries+=1

    triesLog = np.append(triesLog, tries)
    if display:
      print('Successfully guessed the right word in', tries, 'tries ', end='')
      if tries <= 6: print(':)')
      else: print(':(')
      print()
      sleep(1)

  return triesLog

In [55]:
def analyze(array):
  arr_df = pd.DataFrame(array)
  print('Analysis of performance:')
  print(arr_df.describe())

In [None]:
def solve(currentGuesses=guesses, method=2):
    guess = input('Enter first guess:\n').lower()
    if guess == '':
        guess = bestGuess(currentGuesses)[0]
        sleep(1)
        print('First guess: ' + guess)
    while True:
        userIn = input('Enter clues')
        if userIn == 'quit':
            break
        clues = [int(x) for x in userIn]
        if clues == [2,2,2,2,2]:
            print('Correct word has been found')
            break
        currentGuesses = narrowGuesses(clues, guess, currentGuesses)
        guess, bestWords = bestGuess(currentGuesses, method)
        print('Best currentGuesses:', bestWords[:5])
        print('Best Guess:', guess)
        if len(bestWords) == 1:
            print('Correct word has been found')
            break
        sleep(0.5)


In [65]:
triesLog = runWordle(3, display=True, method=2)
analyze(triesLog)

Chosen answer:  rehab
Best guesses:  ['saine', 'soare', 'saice', 'slane', 'slate', 'soily', 'soave', 'samey', 'sauce', 'slice', 'shale', 'savey', 'saute', 'share', 'souce', 'shine', 'suite', 'crane', 'seity', 'slaty']
Current guess:  saine
Current clues:  [0, 1, 0, 0, 1]
Best guesses:  ['beaty', 'teary', 'blaer', 'peaty', 'mealy', 'deary', 'meaty', 'beady', 'beamy', 'leary', 'weary', 'beaky', 'vealy', 'peaky', 'beray', 'peavy', 'peart', 'hoaed', 'feart', 'ready']
Current guess:  beaty
Current clues:  [1, 2, 1, 0, 0]
Best guesses:  ['debar', 'kebar', 'rebar', 'debag', 'melba', 'jelab', 'rehab', 'zebra', 'hejab', 'kebab']
Current guess:  debar
Current clues:  [0, 2, 1, 2, 1]
Best guesses:  ['rehab']
Current guess:  rehab
Current clues:  [2, 2, 2, 2, 2]
Successfully guessed the right word in 4 tries :)

Chosen answer:  angel
Best guesses:  ['saine', 'soare', 'saice', 'slane', 'slate', 'soily', 'soave', 'samey', 'sauce', 'slice', 'shale', 'savey', 'saute', 'share', 'souce', 'shine', 'suite

In [64]:
solve()

First guess: saine
Best currentGuesses: ['prase', 'blase', 'arose', 'chase', 'urase']
Best Guess: prase
Best currentGuesses: ['blase', 'chase', 'lyase', 'cease', 'tease']
Best Guess: blase
Best currentGuesses: ['chase', 'cease', 'tease', 'fease', 'ukase']
Best Guess: chase
Best currentGuesses: ['tease', 'fease', 'ukase', 'mease']
Best Guess: tease
Correct word has been found


In [None]:
'''NEW IDEA:

So the way I score the words right now is the total frequency of each letter across all the answers.
Now this works well, but it doesn't take into account the PLACEMENT of the letters. (current average: 4.6 tries)
My solution is to have a separate freq_dict for each letter position
So that a would have the highest score in the position that it most frequently appears.
I don't think this would be too hard to implement.
'''
'''
Observations: program spends most time in narrowGuesses(), more specifically running np.delete(). 
proposal: finding a more efficient alternative?
Also, there are words that have the same letters (therefore same total_freq). So maybe we could find a way to reward letter placement as well.

NEW IDEA:
Although the program is using the letters that occur most frequent in the list of answers for its first word,
oater is not necessarily the best because it does not account for the positions of the letters.
To find the ACTUAL best word, I should run tests.
1. Get a list of possible best first words (don't know from where I will get this. If it was efficient I would run tests on all possible words)
2. Decide on a good number of times to run wordle with each first word to get a stable average # of tries
3. In theory, the first word with the lowest average # of tries SHOULD BE THE BEST ONE.
'''
'''
Analysis of performance (old method):
count  500.000000
mean     4.630000
std      1.448204
min      2.000000
25%      4.000000
50%      4.000000
75%      5.000000
max     10.000000

NEW IDEA (again):
Use the first two guesses to get the best possible clues. Then use the rest to solve the wordle.
'''
