## First import Pandas to load the data into a dataframe, and the Enum class.

In [1]:
import pandas as pd

from enum import Enum

## To help determine if the letter is found in the word of the day.

- exactly: the letter is found in the word and in the correct spot.
- exist: the letter is in the word but in the wrong spot.
- nonexistent: the letter is not found in the word.


In [2]:
class Val(Enum):
    exactly = 'exactly'
    exist = 'exist'
    nonexistent = 'nonexistent'

## The function to filter out the words that do not match the pattern of existing letters.

In [3]:
def filter_words(top_words, valid_letters, guess):
    temp_words = top_words

    for index, (letter, validation) in enumerate(guess):
        if validation == Val.nonexistent and letter not in valid_letters:
            temp_words = temp_words[~temp_words.isin([letter]).any(axis=1)]
        
        elif validation == Val.exactly:
            temp_words = temp_words[temp_words.iloc[:, index] == letter]
            valid_letters.append(letter)

        elif validation == Val.exist:
            temp_words = temp_words[(temp_words.isin([letter]).any(axis=1)) & (temp_words.iloc[:, index] != letter)]
            valid_letters.append(letter)

    return temp_words

## Load the words.csv file into a Pandas Dataframe.

Use words.csv for words sorted by most frequently used words. Or words2.csv for sorted words that are optimize by the most frequently used letters.

In [4]:
words = pd.read_csv("words2.csv", index_col='rank')
words.head(10)

Unnamed: 0_level_0,1,2,3,4,5,word
rank,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1,s,e,r,i,a,seria
2,a,i,r,e,s,aires
3,r,a,i,s,e,raise
4,a,r,i,s,e,arise
5,a,r,i,e,s,aries
6,s,i,e,n,a,siena
7,a,i,s,n,e,aisne
8,a,n,i,s,e,anise
9,a,r,n,i,e,arnie
10,e,i,n,a,r,einar


## Use the three most frequently used letters to suggest a list of words to start the game. Only words for words.csv.

In [5]:
starting_words = words.copy()
most_frequent_letters = 'aei'

for letter in most_frequent_letters:
    starting_words = starting_words[starting_words.isin([letter]).any(axis=1)]

starting_words[['word']].head(10)

Unnamed: 0_level_0,word
rank,Unnamed: 1_level_1
1,seria
2,aires
3,raise
4,arise
5,aries
6,siena
7,aisne
8,anise
9,arnie
10,einar


## Make a copy of the words Dataframe or rest it.

In [6]:
top_words = words.copy()
valid_letters = []

## Filtering out the top words.

1. Replace the "word" string variable with the word you entered into the game.
2. Using the validation list, set the enum value that corresponds to the index of the letter you entered.
3. Run cell
4. Pick a word from the list to use for your next guess.

In [7]:
word = 'aired'
validations = [Val.exist, Val.nonexistent, Val.exist, Val.nonexistent, Val.exactly]

guess = list(zip(word.lower(), validations))
top_words = filter_words(top_words, valid_letters, guess)
top_words[['word']]

Unnamed: 0_level_0,word
rank,Unnamed: 1_level_1
1277,roald
2045,hoard
2273,board
2274,broad
2623,shard
3335,brand
3501,grand
3612,guard
4270,fraud
4345,chard
