In [5]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import string
pd.set_option('display.max_rows', 50)
pd.set_option("min_rows", 50)


### Make a guess recommender for wordle-this will take an input of what you have played and recommend the best word

In [6]:
def data_setup():
    #load in all data
#-a list of all words in the dictionary
    words = pd.read_csv('words_alpha.txt')
#a list of the most common english words
    word_common_ranking = pd.read_csv('google-10000-english-usa.txt')
#naughty words to remove
    naughty_word = pd.read_csv('naughty_words.txt')
    #use the index to rank each word as the list is already ordered
    word_common_ranking['common_ranking'] = word_common_ranking.index + 1
#join the full word list with the list of top 10,000 common words
    words = pd.merge(words,word_common_ranking, how='left', on = 'word')
#left anti join the list to the naughty words to ensure they don't show up
    words = pd.merge(words, naughty_word, how='outer', on = 'word', indicator=True)
    words= words[words['_merge'] == 'left_only']

###filter out all non five letter words
    words['word_length'] = words['word'].str.len()
    words = words.loc[words['word_length']==5]
#create columns that show the letter by position 
    for i in range(1,6):
        words['pos_'+str(i)] = words['word'].str.slice(start=(i-1),stop=i)
        
###create list of letters and numbers to use 
    alphabet_string = string.ascii_lowercase
    alphabet_list = list(alphabet_string)
    numbers_list = ['0','1','2','3','4','5','6','7','8','9']
    
#loop through each letter and mark whether the word has the letter
#divide the number of words with each letter by the number of words in total to measure how common certain letters are
#word value is defined as the sum of how common the letters in the word are

    column_names = []
    for letter in alphabet_list:
        words['contains_'+letter] = np.where(words['word'].str.contains(letter), 1, 0)
        sum_letter = sum(words['contains_'+letter])
        perc_letter = sum_letter/len(words)
        words['value_letter_'+letter]=np.where(words['contains_'+letter]==1, perc_letter,0)
        column_names.append('value_letter_'+letter)
    

    words['word_value'] = words[column_names].sum(axis=1)
#rank the words by word value and average the rankings
    words['word_value_ranking'] = words['word_value'].rank(method='max', ascending = False)
    count_nas = len(words.loc[words['common_ranking'].isna()==True])
    words['common_ranking'] = np.where(words['common_ranking'].isna()==False,  words['common_ranking'],count_nas)

    words['final_ranking'] = words[['word_value_ranking', 'common_ranking']].mean(axis=1)
    words['common_ranking'] = np.where(words['common_ranking']==count_nas,  'Uncommon',words['common_ranking'])

    return words



### Play the game. Play the first round normally and then enter what letters are green (you know position) and what letters are yellow (you know they are in the word) and what letters you used.

In [11]:
words = data_setup()
numbers_list = ['0','1','2','3','4','5','6','7','8','9']

for round_num in range(1, 7):
    print('Guess number: '+str(round_num))
    if round_num ==1:
        guess = input('Your guess is: ')
        guess_list = list(guess)
    else:
        green = input('Your green letters are-write in format letter,letter such as i,e: ')
        yellow = input('Your yellow letters are-write in format letter,letter such as i,e: ')
        list_green = list(green)
        list_yellow = list(yellow)
        
        if green!='':
            list_green = [elem for elem in list_green if elem != ',']
            if len(list_green)==5:
                print('Congrats! You Solved the puzzle in '+ str(round_num -1) + ' tries!')
                break
            for elem in guess_list:
                if elem in list_green:
                    index = guess_list.index(elem)
                    words = words.loc[words['pos_'+str(index +1)]==elem]
        else:
            list_green = []
        if yellow != '':
            list_yellow = [elem for elem in list_yellow if elem != ',']
            for elem in guess_list:
                if elem in list_yellow:
                    index = guess_list.index(elem)
                    words = words.loc[~(words['pos_'+str(index +1)]==elem)]
                    words = words.loc[words['contains_'+elem]==1]        
        else:
            list_yellow = []
        not_in_wordle = [i for i in guess_list if i not in list_green and i not in list_yellow and i not in numbers_list]
        for elem in not_in_wordle:
            words = words.loc[words['contains_'+elem]==0]
        display(words[['word', 'final_ranking', 'word_value', 'common_ranking']].sort_values(['final_ranking'], ascending=[True]).head(50))

        round_num +=1
        guess = input('Your guess is: ')
        guess_list = list(guess)



Guess number: 1
Your guess is: fuzzy
Guess number: 2
Your green letters are-write in format letter,letter such as i,e: 
Your yellow letters are-write in format letter,letter such as i,e: u


Unnamed: 0,word,final_ranking,word_value,common_ranking
50045,cause,868.5,1.61325,1223.0
1170,abuse,1492.5,1.572229,2225.0
144388,house,1557.5,1.424185,297.0
355953,usage,1913.0,1.568013,3031.0
356844,value,2039.5,1.382471,410.0
274952,route,2328.5,1.464578,2532.0
338808,under,2480.5,1.334655,190.0
194283,mouse,2517.5,1.433056,2382.0
326694,tours,2620.0,1.410532,2133.0
756,about,2632.5,1.313515,36.0


Your guess is: cause
Guess number: 3
Your green letters are-write in format letter,letter such as i,e: u,e
Your yellow letters are-write in format letter,letter such as i,e: 


Unnamed: 0,word,final_ranking,word_value,common_ranking
274952,route,2328.5,1.464578,2532.0
274755,rouge,6749.0,1.339122,8822.0
156978,inure,8321.5,1.468164,Uncommon
175451,louie,8367.0,1.462187,Uncommon
194188,moule,9931.0,1.311187,Uncommon
254256,prune,10020.0,1.303322,Uncommon
238466,piute,10023.0,1.30307,Uncommon
41735,brule,10064.0,1.300302,Uncommon
175483,loupe,10102.0,1.297345,Uncommon
244885,poule,10102.0,1.297345,Uncommon


Your guess is: route
Guess number: 4
Your green letters are-write in format letter,letter such as i,e: o,u,e
Your yellow letters are-write in format letter,letter such as i,e: 


Unnamed: 0,word,final_ranking,word_value,common_ranking
175451,louie,8367.0,1.462187,Uncommon
194188,moule,9931.0,1.311187,Uncommon
175483,loupe,10102.0,1.297345,Uncommon
244885,poule,10102.0,1.297345,Uncommon
38927,boule,10252.0,1.284258,Uncommon
161869,joule,11522.0,1.186108,Uncommon
38894,bouge,11916.0,1.154838,Uncommon
144521,houve,12503.5,1.109224,Uncommon
361456,vouge,12750.0,1.086951,Uncommon
128306,gouge,13306.5,1.033283,Uncommon


Your guess is: gouge
Guess number: 5
Your green letters are-write in format letter,letter such as i,e: g,o,u,g,e
Your yellow letters are-write in format letter,letter such as i,e: 
Congrats! You Solved the puzzle in 4 tries!


In [14]:
pwd

'C:\\Users\\dbuch\\Documents'