### Step-01: Import libraries

In [1]:
import pandas as pd
import numpy as np
import plotly.express as px
import nltk
import warnings
warnings.filterwarnings('ignore')

### Step-02: Create DataFrame of all 5-letter English words using NLTK

In [2]:
english_words = set(nltk.corpus.words.words())
english_words_df = pd.DataFrame(english_words,columns=['word'])
english_words_df['word'] = english_words_df['word'].str.lower()
english_words_df['word_length'] = english_words_df.apply(lambda x: len(x['word']),axis=1)
english_words_5_characters_df = english_words_df[english_words_df['word_length'] == 5]

In [3]:
print('5-letter English Words:',english_words_5_characters_df.shape[0])
english_words_5_characters_df.head()

5-letter English Words: 10230


Unnamed: 0,word,word_length
25,pouce,5
68,coroa,5
77,atmos,5
88,amita,5
95,zoeal,5


### Step-03: Break out each letter in each 5-letter word

In [4]:
english_words_5_characters_letters_df = (english_words_5_characters_df['word']
                                         .str
                                         .split('',expand=True)
                                         .iloc[:,1:6]
                                        )
english_words_5_characters_letters_df['word'] = english_words_5_characters_df['word']
english_words_5_characters_letters_df.columns = ['letter_1',
                                                 'letter_2',
                                                 'letter_3',
                                                 'letter_4',
                                                 'letter_5',
                                                 'word'
                                                ]

english_words_5_characters_letters_df.head()

Unnamed: 0,letter_1,letter_2,letter_3,letter_4,letter_5,word
25,p,o,u,c,e,pouce
68,c,o,r,o,a,coroa
77,a,t,m,o,s,atmos
88,a,m,i,t,a,amita
95,z,o,e,a,l,zoeal


### Step-04: Calculate frequency of each letter in each position

In [5]:
letter_1_counts = (pd.DataFrame(english_words_5_characters_letters_df['letter_1']
                                .value_counts()
                               )
                   .reset_index(names=['letter'])
                   .rename(columns={'letter_1':'count',
                                    'letter':'letter_1'
                                   }
                          )
                  )
english_words_5_characters_counts = english_words_5_characters_letters_df.shape[0]
letter_1_counts['letter_1_percent'] = (letter_1_counts
                              .apply(lambda x: x['count'] / english_words_5_characters_counts,
                                     axis=1
                                    )
                             )
letter_1_counts = letter_1_counts[['letter_1','letter_1_percent']]
letter_1_counts.head()

Unnamed: 0,letter_1,letter_1_percent
0,s,0.127664
1,a,0.078397
2,c,0.073314
3,b,0.071848
4,t,0.066569


In [6]:
letter_2_counts = (pd.DataFrame(english_words_5_characters_letters_df['letter_2']
                                .value_counts()
                               )
                   .reset_index(names=['letter'])
                   .rename(columns={'letter_2':'count',
                                    'letter':'letter_2'
                                   }
                          )
                  )

letter_2_counts['letter_2_percent'] = (letter_2_counts
                              .apply(lambda x: x['count'] / english_words_5_characters_counts,
                                     axis=1
                                    )
                             )
letter_2_counts = letter_2_counts[['letter_2','letter_2_percent']]
letter_2_counts.head()

Unnamed: 0,letter_2,letter_2_percent
0,a,0.17781
1,o,0.132063
2,e,0.120332
3,i,0.096188
4,u,0.086022


In [7]:
letter_3_counts = (pd.DataFrame(english_words_5_characters_letters_df['letter_3']
                                .value_counts()
                               )
                   .reset_index(names=['letter'])
                   .rename(columns={'letter_3':'count',
                                    'letter':'letter_3'
                                   }
                          )
                  )

letter_3_counts['letter_3_percent'] = (letter_3_counts
                              .apply(lambda x: x['count'] / english_words_5_characters_counts,
                                     axis=1
                                    )
                             )
letter_3_counts = letter_3_counts[['letter_3','letter_3_percent']]
letter_3_counts.head()

Unnamed: 0,letter_3,letter_3_percent
0,a,0.097752
1,r,0.096676
2,i,0.0826
3,n,0.07654
4,o,0.075758


In [8]:
letter_4_counts = (pd.DataFrame(english_words_5_characters_letters_df['letter_4']
                                .value_counts()
                               )
                   .reset_index(names=['letter'])
                   .rename(columns={'letter_4':'count',
                                    'letter':'letter_4'
                                   }
                          )
                  )

letter_4_counts['letter_4_percent'] = (letter_4_counts
                              .apply(lambda x: x['count'] / english_words_5_characters_counts,
                                     axis=1
                                    )
                             )
letter_4_counts = letter_4_counts[['letter_4','letter_4_percent']]
letter_4_counts.head()

Unnamed: 0,letter_4,letter_4_percent
0,e,0.132551
1,a,0.11261
2,i,0.093157
3,n,0.060508
4,r,0.057771


In [9]:
letter_5_counts = (pd.DataFrame(english_words_5_characters_letters_df['letter_5']
                                .value_counts()
                               )
                   .reset_index(names=['letter'])
                   .rename(columns={'letter_5':'count',
                                    'letter':'letter_5'
                                   }
                          )
                  )

letter_5_counts['letter_5_percent'] = (letter_5_counts
                              .apply(lambda x: x['count'] / english_words_5_characters_counts,
                                     axis=1
                                    )
                             )
letter_5_counts = letter_5_counts[['letter_5','letter_5_percent']]
letter_5_counts.head()

Unnamed: 0,letter_5,letter_5_percent
0,e,0.138514
1,y,0.133822
2,a,0.100684
3,t,0.071652
4,r,0.071163


### Step-05: Merge in letter position weights (percentages) with the 5-letter word DataFrame

In [10]:
english_words_5_characters_letters_df = pd.merge(left=english_words_5_characters_letters_df,
                                                 right=letter_1_counts,
                                                 on='letter_1',
                                                 how='left'
                                                )

english_words_5_characters_letters_df = pd.merge(left=english_words_5_characters_letters_df,
                                                 right=letter_2_counts,
                                                 on='letter_2',
                                                 how='left'
                                                )

english_words_5_characters_letters_df = pd.merge(left=english_words_5_characters_letters_df,
                                                 right=letter_3_counts,
                                                 on='letter_3',
                                                 how='left'
                                                )

english_words_5_characters_letters_df = pd.merge(left=english_words_5_characters_letters_df,
                                                 right=letter_4_counts,
                                                 on='letter_4',
                                                 how='left'
                                                )

english_words_5_characters_letters_df = pd.merge(left=english_words_5_characters_letters_df,
                                                 right=letter_5_counts,
                                                 on='letter_5',
                                                 how='left'
                                                )
english_words_5_characters_letters_df.head()

Unnamed: 0,letter_1,letter_2,letter_3,letter_4,letter_5,word,letter_1_percent,letter_2_percent,letter_3_percent,letter_4_percent,letter_5_percent
0,p,o,u,c,e,pouce,0.058651,0.132063,0.053177,0.04262,0.138514
1,c,o,r,o,a,coroa,0.073314,0.132063,0.096676,0.056891,0.100684
2,a,t,m,o,s,atmos,0.078397,0.023949,0.039883,0.056891,0.050929
3,a,m,i,t,a,amita,0.078397,0.014956,0.0826,0.05435,0.100684
4,z,o,e,a,l,zoeal,0.006452,0.132063,0.06305,0.11261,0.057576


### Step-06: Create word-level scores (weights) using letter weights and normalize 0-1
### Words with high scores have letters that occur frequently in that position

In [11]:
english_words_5_characters_letters_df['score'] = (english_words_5_characters_letters_df['letter_1_percent'] *
                                                  english_words_5_characters_letters_df['letter_2_percent'] *
                                                  english_words_5_characters_letters_df['letter_3_percent'] *
                                                  english_words_5_characters_letters_df['letter_4_percent'] *
                                                  english_words_5_characters_letters_df['letter_5_percent']
                                                 )
top_score = english_words_5_characters_letters_df['score'].max()
english_words_5_characters_letters_df['score'] = (english_words_5_characters_letters_df
                                                  .apply(lambda x:
                                                         x['score'] / top_score,
                                                         axis=1
                                                        )
                                                 )
english_words_5_characters_letters_df = (english_words_5_characters_letters_df
                                         .sort_values(by='score',ascending=False)
                                         .reset_index(drop=True)
                                        )
english_words_5_characters_letters_df.head()

Unnamed: 0,letter_1,letter_2,letter_3,letter_4,letter_5,word,letter_1_percent,letter_2_percent,letter_3_percent,letter_4_percent,letter_5_percent,score
0,s,o,r,e,e,soree,0.127664,0.132063,0.096676,0.132551,0.138514,1.0
1,s,a,l,a,y,salay,0.127664,0.17781,0.062561,0.11261,0.133822,0.715134
2,s,a,r,a,n,saran,0.127664,0.17781,0.096676,0.11261,0.070968,0.586053
3,c,o,r,e,e,coree,0.073314,0.132063,0.096676,0.132551,0.138514,0.574273
4,b,o,r,e,e,boree,0.071848,0.132063,0.096676,0.132551,0.138514,0.562787


### Step-07: Create WordleAgent class with associated functions
### This is the agent that generates guesses at each step when playing Wordle. It does this by selecting the word that has the highest score based on the given parameters in the guess_word function.

In [12]:
class WordleAgent():
    def __init(self):
        pass
    
    def guess_word(self,
                   words_df,
                   word_column,
                   previous_guesses=[],
                   not_in_word=[],
                   letter_1='',
                   letter_2='',
                   letter_3='',
                   letter_4='',
                   letter_5='',
                   letter_1_not=[],
                   letter_2_not=[],
                   letter_3_not=[],
                   letter_4_not=[],
                   letter_5_not=[],
                   contains_letters=[]
                  ):
            
        likely_words = words_df
        
        likely_words = likely_words[~likely_words[word_column].isin(previous_guesses)]
        
        if len(list(set(not_in_word))) > 0:
            for letter in list(set(not_in_word)):
                likely_words = likely_words[~likely_words[word_column].str.contains(letter)]
        else:
            pass
        
        if len(list(set(contains_letters))) > 0:
            for letter in list(set(contains_letters)):
                likely_words = likely_words[words_df[word_column].str.contains(letter)]
        else:
            pass
        
        # letter 1 equal to
        if letter_1 != '':
            likely_words = likely_words[likely_words['letter_1'] == letter_1]
        else:
            pass
        
        # letter 1 not equal to
        if len(letter_1_not) > 0:
            for letter in list(set(letter_1_not)):
                likely_words = likely_words[likely_words['letter_1'] != letter]
        else:
            pass
        
        # letter 2 equal to
        if letter_2 != '':
            likely_words = likely_words[likely_words['letter_2'] == letter_2]
        else:
            pass
        
        # letter 2 not equal to
        if len(letter_2_not) > 0:
            for letter in list(set(letter_2_not)):
                likely_words = likely_words[likely_words['letter_2'] != letter]
        else:
            pass
        
        # letter 3 equal to
        if letter_3 != '':
            likely_words = likely_words[likely_words['letter_3'] == letter_3]
        else:
            pass
        
        # letter 3 not equal to
        if len(letter_3_not) > 0:
            for letter in list(set(letter_3_not)):
                likely_words = likely_words[likely_words['letter_3'] != letter]
        else:
            pass
        
        # letter 4 equal to
        if letter_4 != '':
            likely_words = likely_words[likely_words['letter_4'] == letter_4]
        else:
            pass
        
        # letter 4 not equal to
        if len(letter_4_not) > 0:
            for letter in list(set(letter_4_not)):
                likely_words = likely_words[likely_words['letter_4'] != letter]
        else:
            pass
        
        # letter 5 equal to
        if letter_5 != '':
            likely_words = likely_words[likely_words['letter_5'] == letter_5]
        else:
            pass
        
        # letter 5 not equal to
        if len(letter_5_not) > 0:
            for letter in list(set(letter_5_not)):
                likely_words = likely_words[likely_words['letter_5'] != letter]
        else:
            pass
        
        try:
            guessed_word = likely_words[word_column].values[0]
        except:
            print('No words meet this/these criteria')
            
        print(f'You should guess {guessed_word}')
        
        return guessed_word

### Step-08: Create Wordle class with associated functions. The three functions are (1) generate_word, which generates a random 5-letter word; (2) assess_word, which compares the guessed word to the true word; and (3) play, which is used to play Wordle with iterative word suggestions from the above WordleAgent.

In [13]:
class Wordle():
    def __init__(self):
        pass
    
    def generate_word(self,words_df,column):
        word = words_df[column].sample(1).values[0]
        return word
    
    def assess_word(self,guessed_word,true_word):
        result = []
#        print(f'Guessed word: {guessed_word}')
#        print(f'True word: {true_word}')
        if guessed_word == true_word:
            result = ['G','G','G','G','G']
            print('Words are the same - you win')
        else:
            for i,j in zip(guessed_word,true_word):
                if i == j:
                    result.append('G')
                elif i in true_word:
                    result.append('Y')
                else:
                    result.append('-')
        return result
        
    def play(self):
        true_word = self.generate_word(english_words_5_characters_df,column='word')
        previous_guesses = []
        not_in_word=[]
        letter_1=''
        letter_2=''
        letter_3=''
        letter_4=''
        letter_5=''
        letter_1_not=[]
        letter_2_not=[]
        letter_3_not=[]
        letter_4_not=[]
        letter_5_not=[]
        contains_letters=[]
        for i in range(1,7):
            print(f'Attempt #{i}')
            if i == 1:
                first_guess = (WordleAgent()
                               .guess_word(words_df=english_words_5_characters_letters_df,
                                           word_column='word'
                                          )
                              )
            guessed_word = input('Make your guess: ')
            guessed_word
            previous_guesses.append(guessed_word)
            result = self.assess_word(guessed_word,true_word)
            print(result)
            if i == 6 and result != ['G','G','G','G','G']:
                print('Incorrect guess - you lose')
                print(f'The correct answer was {true_word}')
            elif result == ['G','G','G','G','G']:
                print('Correct - you win')
                break
            else:
                for a,b,c in zip(previous_guesses[-1],result,range(1,6)):
                    if c == 1:
                        if b == 'G':
                            letter_1 = a
                        elif b == 'Y':
                            contains_letters.append(a)
                            letter_1_not.append(a)
                        else:
                            not_in_word.append(a)
                    if c == 2:
                        if b == 'G':
                            letter_2 = a
                        elif b == 'Y':
                            contains_letters.append(a)
                            letter_2_not.append(a)
                        else:
                            not_in_word.append(a)
                    if c == 3:
                        if b == 'G':
                            letter_3 = a
                        elif b == 'Y':
                            contains_letters.append(a)
                            letter_3_not.append(a)
                        else:
                            not_in_word.append(a)
                    if c == 4:
                        if b == 'G':
                            letter_4 = a
                        elif b == 'Y':
                            contains_letters.append(a)
                            letter_4_not.append(a)
                        else:
                            not_in_word.append(a)
                    if c == 5:
                        if b == 'G':
                            letter_5 = a
                        elif b == 'Y':
                            contains_letters.append(a)
                            letter_5_not.append(a)
                        else:
                            not_in_word.append(a)
                
                next_guess = (WordleAgent()
                              .guess_word(words_df=english_words_5_characters_letters_df,
                                          word_column='word',
                                          previous_guesses=previous_guesses,
                                          not_in_word=not_in_word,
                                          contains_letters=contains_letters,
                                          letter_1=letter_1,
                                          letter_2=letter_2,
                                          letter_3=letter_3,
                                          letter_4=letter_4,
                                          letter_5=letter_5,
                                          letter_1_not=letter_1_not,
                                          letter_2_not=letter_2_not,
                                          letter_3_not=letter_3_not,
                                          letter_4_not=letter_4_not,
                                          letter_5_not=letter_5_not
                                         )
                             )

            print('')
        return previous_guesses,true_word

### Step-09: Play the Wordle game with the WordleAgent providing the word you should guess at each step.

In [14]:
wordle = Wordle()
prev,true = wordle.play()

Attempt #1
You should guess soree
Make your guess: soree
['-', '-', '-', '-', '-']
You should guess palay

Attempt #2
Make your guess: palay
['-', 'G', '-', 'Y', 'Y']
You should guess banya

Attempt #3
Make your guess: banya
['-', 'G', 'Y', 'Y', 'Y']
You should guess maynt

Attempt #4
Make your guess: maynt
['-', 'G', 'G', 'Y', '-']
You should guess zayin

Attempt #5
Make your guess: zayin
Words are the same - you win
['G', 'G', 'G', 'G', 'G']
Correct - you win
