In [28]:

# import pandas as pd
import pandas as pd 
import numpy as np
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all" 
import random
import matplotlib.pyplot as plt
from collections import Counter

# **WORDLE Algorithm Solver**

There is a set amount of 6 tries. Each guess is a valid 5 letter word and if the letter is in the word and in the correct spot it will be GREEN if it in the word but in the wrong spot then it'll be YELLOW if it not in the word then it will be GRAY.

There are initially 12,972 possible words that could be the word of the day. The solver always guesses the first choice to be "aeros" from the high probablity of that work yeilding yellows or greens. From there the solver takes in the clues to reduce the amount of words and guesses until it arrives at the correct word! 


In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
with open('/content/drive/My Drive/Colab Notebooks/Data Science/Wordle_words.csv') as f: 
    five_words = list(f) 

In [29]:
### DF to work with
# removing the /n in the list
converted_list = []
for element in five_words:
    converted_list.append(element.strip())

# splitting up all of the words into letters
final_lst = []
for word in converted_list:
  final_lst.append(list(word))

# convert to df
letters_df = pd.DataFrame(final_lst, columns =['first', 'second', 'third', 'fourth', 'fifth'])
letters_df['word'] = converted_list
letters_df.head(1)


# bringing in top frequencies of letters in words
all_words = "".join(converted_list) 
res = Counter(all_words)

df_res = pd.DataFrame.from_dict(res, orient='index').reset_index()
df_res = df_res.rename(columns={'index':'letter', 0:'count'})

sorted_df = df_res.sort_values(["count"], ascending=False)
sorted_df['rank'] = np.arange(1,27)
sorted_df.head(1)

# global variables 
df_test = letters_df.copy()

Unnamed: 0,first,second,third,fourth,fifth,word
0,a,a,h,e,d,aahed


Unnamed: 0,letter,count,rank
12,s,6665,1


# **User Created Functions**

In [30]:
# reads in a dataframe of words and outputs the word with the highest frequency of unique letters
def most_frequent(df, n_uq):
  score = []
  nunique = []
  for i in df.index: 
    l1 = df['first'][i]
    l1_r = int(sorted_df[sorted_df['letter']== l1]['rank'])
    
    l2 = df['second'][i]
    l2_r = int(sorted_df[sorted_df['letter']== l2]['rank'])
    
    l3 = df['third'][i]
    l3_r = int(sorted_df[sorted_df['letter']== l3]['rank'])
    
    l4 = df['fourth'][i]
    l4_r = int(sorted_df[sorted_df['letter']== l4]['rank'])
    
    l5 = df['fifth'][i]
    l5_r = int(sorted_df[sorted_df['letter']== l5]['rank'])

    score = np.append(score, l1_r + l2_r + l3_r + l4_r + l5_r)
    nunique = np.append(nunique, len(np.unique(list(df['word'][i]))))
  df['nunique'] = nunique
  df['score'] = score

  # return top word
  five_df = df[df['nunique'] >= n_uq]
  if len(five_df) > 0:
    min_df = five_df[five_df['score']==min(five_df['score'])]
  else: 
    while len(five_df) == 0:
      n_uq = n_uq - 1
      five_df = df[df['nunique'] >= n_uq] 
    min_df = five_df[five_df['score']==min(five_df['score'])]

  return min_df['word'].iloc[0]

In [31]:
def first_guess(df, n_uq):
  # first guess takes a word of most frequent occurance  
  guess = 'aeros'  
  guessed_words.append(guess)
  if guess == word_of_day:
    print('Congrats you guessed the word of the day! \n The word was: ', word_of_day)
    return 1
  else:  
    no_g = len(guessed_words)
    print('Guess ', no_g, ':', guess, ' \n  Wrong! Try again.')
    return 0

In [32]:
# labels the grey, yellow, greens:
def clue(df, guess):
  
  guess_word = guess[-1]
  guess_lst = list(guess_word)
  guess_pos = ['first', 'second', 'third', 'fourth', 'fifth']

  # binary list indicators 1 if letter exists 0 if not 
  first_lst = []         
  second_lst = []
  third_lst = []
  fourth_lst = []
  fifth_lst = []

  # reset lists
  yellow_position = []
  yellow_letters = []
  grey_letters = []
  green_letters = [] 

  # tagging for yellow/green and outputs any gray letters 
  for l in guess_lst:
    # default for indicator for 0 (doesn't exists)
    first_ind = 0
    second_ind = 0
    third_ind = 0
    fourth_ind = 0
    fifth_ind = 0

    # statements to tag if letter exists in word of the day
    if l == final_word['first'].iloc[0]:
      first_ind = 1
    if l == final_word['second'].iloc[0]:
      second_ind = 1
    if l == final_word['third'].iloc[0]:
      third_ind = 1
    if l == final_word['fourth'].iloc[0]:
      fourth_ind = 1
    if l == final_word['fifth'].iloc[0]:
      fifth_ind = 1

    # else it is a gray letter
    if l not in final_word['word'].iloc[0]:
      grey_letters.append(l)

    # appends indicators to the lists
    first_lst.append(first_ind)
    second_lst.append(second_ind)
    third_lst.append(third_ind)
    fourth_lst.append(fourth_ind)
    fifth_lst.append(fifth_ind)

  # green check
  if first_lst[0] == 1:
    green_letters.append('first') 

  if second_lst[1] == 1:
    green_letters.append('second')  
    
  if third_lst[2] == 1:
    green_letters.append('third')
    
  if fourth_lst[3] == 1:
    green_letters.append('fourth')
    
  if fifth_lst[4] == 1:
    green_letters.append('fifth')

  # yellow check 
  if (1 in first_lst) and (first_lst[0] != 1):
    for x in np.arange(0,5):
      if first_lst[x] == 1:
        yellow_position.append(guess_pos[x]) 
        yellow_letters.append(guess_lst[x]) 

  if (1 in second_lst) and (second_lst[1] != 1):
    for x in np.arange(0,5):
      if second_lst[x] == 1:
        yellow_position.append(guess_pos[x]) 
        yellow_letters.append(guess_lst[x]) 
    
  if (1 in third_lst) and (third_lst[2] != 1):
    for x in np.arange(0,5):
      if third_lst[x] == 1:
        yellow_position.append(guess_pos[x]) 
        yellow_letters.append(guess_lst[x]) 
    
  if (1 in fourth_lst) and (fourth_lst[3] != 1):
    for x in np.arange(0,5):
      if fourth_lst[x] == 1:
        yellow_position.append(guess_pos[x]) 
        yellow_letters.append(guess_lst[x])  
    
  if (1 in fifth_lst) and (fifth_lst[4] != 1):
    for x in np.arange(0,5):
      if fifth_lst[x] == 1:
        yellow_position.append(guess_pos[x]) 
        yellow_letters.append(guess_lst[x])  

  # removes the grey letters from df 
  for letter in grey_letters:
    df = df[~df['word'].str.contains(letter)] 

  # keeps the green letters in df  
  for place in green_letters:
    letter = final_word[place].iloc[0]
    df = df[df[place]==letter] 
  
  # additional catch to ommit instances with green and yellow overlap 
  intersection_set = set.intersection(set(green_letters), set(yellow_position))
  intersection_list = list(intersection_set)
  if len(intersection_list) == 1:
    yellow_position.remove(intersection_list[0])
  else:
    for i in intersection_list:
      yellow_position.remove(i)

  # keeps the yellow letters in df
  word = letters_df[letters_df['word']== guess_word]
  # remove yellow words in their yellow position 
  for pos in yellow_position:
    letter = word[pos].iloc[0]
    df = df[df[pos]!=letter]
  # keep the yellow words
  for letter in yellow_letters:
    df = df[df['word'].str.contains(letter)]  

  # debugging testing
#  print('yellow_position', yellow_position)
#  print('yellow_letters', yellow_letters)
#  print('grey_letters', grey_letters)
#  print('green_letters', green_letters)

  return df

In [33]:
# new guesses
def guessing(df, n_uq):
  # first guess takes a word of most frequent occurance  
  guess = most_frequent(df, n_uq)
  guessed_words.append(guess)
  if guess == word_of_day: 
    print('Congrats you guessed the word of the day! \n The word was: ', word_of_day)
    return 1
  else:  
    no_g = len(guessed_words)
    print('Guess ', no_g, ':', guess, ' \n  Wrong! Try again.')
    return 0

In [34]:
#########################
# Main Function Solver
#########################
def main_solver(df):
  winner = 0
  while winner == 0:
    if len(guessed_words) < 1:
      winner = first_guess(df, 5)  
      df = clue(df, guessed_words) 
      print("There are", len(df), " possible words")
    else:
      # user created functions
      winner = guessing(df,5)
      df = clue(df, guessed_words)  
      print("There are", len(df), " possible words") 
  return print("Congrats! Wordle Solver guessed the word of the day in ", len(guessed_words), " moves :D")

In [None]:
guessed_words = [] 
word_of_day = 'sugar'
final_word = letters_df[letters_df['word'] == word_of_day]

main_solver(df_test)

Guess  1 : aeros  
  Wrong! Try again.
There are 53  possible words
Guess  2 : sitar  
  Wrong! Try again.
There are 2  possible words
Congrats you guessed the word of the day! 
 The word was:  sugar
There are 1  possible words
Congrats! Wordle Solver guessed the word of the day in  3  moves :D


In [None]:
guessed_words = [] 
word_of_day = 'scamp'
final_word = letters_df[letters_df['word'] == word_of_day]

main_solver(df_test)

Guess  1 : aeros  
  Wrong! Try again.
Guess  2 : slain  
  Wrong! Try again.
Guess  3 : scaud  
  Wrong! Try again.
Guess  4 : scath  
  Wrong! Try again.
Congrats you guessed the word of the day! 
 The word was:  scamp
Congrats! Wordle Solver guessed the word of the day in  5  moves :D


In [None]:
guessed_words = [] 
word_of_day = 'pests'
final_word = letters_df[letters_df['word'] == word_of_day]

main_solver(df_test)

Guess  1 : aeros  
  Wrong! Try again.
There are 310  possible words
Guess  2 : teils  
  Wrong! Try again.
There are 53  possible words
Guess  3 : dents  
  Wrong! Try again.
There are 24  possible words
Guess  4 : hefts  
  Wrong! Try again.
There are 19  possible words
Guess  5 : yests  
  Wrong! Try again.
There are 8  possible words
Congrats you guessed the word of the day! 
 The word was:  pests
There are 1  possible words
Congrats! Wordle Solver guessed the word of the day in  6  moves :D


In [None]:
guessed_words = [] 
word_of_day = 'tests'
final_word = letters_df[letters_df['word'] == word_of_day]

main_solver(df_test)

Guess  1 : aeros  
  Wrong! Try again.
There are 310  possible words
Guess  2 : teils  
  Wrong! Try again.
There are 16  possible words
Guess  3 : tends  
  Wrong! Try again.
There are 12  possible words
Guess  4 : tegus  
  Wrong! Try again.
There are 10  possible words
Guess  5 : techs  
  Wrong! Try again.
There are 8  possible words
Guess  6 : temps  
  Wrong! Try again.
There are 5  possible words
Guess  7 : texes  
  Wrong! Try again.
There are 3  possible words
Guess  8 : teffs  
  Wrong! Try again.
There are 2  possible words
Congrats you guessed the word of the day! 
 The word was:  tests
There are 1  possible words
Congrats! Wordle Solver guessed the word of the day in  9  moves :D


In [35]:
guessed_words = [] 
word_of_day = 'dodge'
final_word = letters_df[letters_df['word'] == word_of_day]

main_solver(df_test)

Guess  1 : aeros  
  Wrong! Try again.
There are 403  possible words
Guess  2 : toile  
  Wrong! Try again.
There are 47  possible words
Guess  3 : coude  
  Wrong! Try again.
There are 7  possible words
Guess  4 : podge  
  Wrong! Try again.
There are 4  possible words
Guess  5 : modge  
  Wrong! Try again.
There are 3  possible words
Guess  6 : bodge  
  Wrong! Try again.
There are 2  possible words
Guess  7 : wodge  
  Wrong! Try again.
There are 1  possible words
Congrats you guessed the word of the day! 
 The word was:  dodge
There are 1  possible words
Congrats! Wordle Solver guessed the word of the day in  8  moves :D
