In [1]:
# import libraries
import pandas as pd
import re
#from english_words import english_words_set
from wordfreq import zipf_frequency # count the usage frequency of words

In [2]:
# wordle word pool obtained from: https://gist.github.com/cfreshman

# before nyt bought wordle
guesses_org = pd.read_csv('wordle-allowed-guesses.txt', header=None)
answers_org = pd.read_csv('wordle-answers-alphabetical.txt', header=None)

# after nyt bought wordle
guesses_nyt = pd.read_csv('wordle-nyt-allowed-guesses.txt', header=None)
answers_nyt = pd.read_csv('wordle-nyt-answers-alphabetical.txt', header=None)

In [3]:
# combine the allowed guesses and answers to create the complete allowed word list
word_pool_org = guesses_org[0].to_list() + answers_org[0].to_list()
word_pool_nyt = guesses_nyt[0].to_list() + answers_nyt[0].to_list()

In [4]:
print(len(word_pool_org))
print(len(word_pool_nyt))

12972
12947


In [5]:
# count the usage frequency of words in word_list using the wordfreq library
# more information about the library: https://pypi.org/project/wordfreq/
def create_usage_freq_d(word_list):    
    usage_freq_d = {}
    for word in word_list:
        usage_freq_d[word] = zipf_frequency(word, 'en')
        
    # sort the dict by the usage frequency score in desc order
    sorted_usage_freq_d = {k: v for k, v in reversed(sorted(usage_freq_d.items(), key=lambda item: item[1]))}
    return sorted_usage_freq_d

In [6]:
# this function is not used
def create_letter_score_d(word_list):
    letters_d = {}
    for letter in ''.join(word_list):
        if letter in letters_d:
            letters_d[letter] += 1
        else:
            letters_d[letter] = 1
        
    freq_d = {key: 0 for key in word_list}
    for word in word_list:
        unique_letter_word = "".join(set(word))
        for n in unique_letter_word:
            freq_d[word] += letters_d[n]
        
    sorted_freq_d = {k: v for k, v in reversed(sorted(freq_d.items(), key=lambda item: item[1]))}

In [7]:
# function to update the word list based on the green/yellow/black feedback after each guess
# function returns an updated and usage frequency sorted word list
def update_pool(pool, guess, pattern):
    pattern_dict = {}
    for n in range(5):
        if list(guess)[n] not in pattern_dict:
            pattern_dict[list(guess)[n]] = list(pattern)[n]
        else:
            pattern_dict[list(guess)[n]+'1'] = list(pattern)[n]

    b_list = []
    for k, v in pattern_dict.items():
        if v =='b':
            b_list.append(k[0])
            
    g_list = []
    for k, v in pattern_dict.items():
        if v =='g':
            g_list.append(k[0])
            
    b_list = [letter for letter in b_list if letter not in g_list]
            
    #print('Black List:')
    #print(b_list)
    #print("")
    
    if len(b_list) != 0:
        for b_letter in b_list:
            pool = [word for word in pool if b_letter not in word]
            
    #print('After Black:')
    #print(pool)
    #print("")
            
    g_pattern = ''
    for k, v in pattern_dict.items():
        if v =='g':
            g_pattern+=k[0]
        else:
            g_pattern+='.'
    
    p = re.compile(g_pattern)
    pool = [word for word in pool if p.match(word)]
    
    #print("After Green:")
    #print(pool)
    #print("")
    
    count = 0
    y_dict = {}
    for k, v in pattern_dict.items():
        if v =='y':
            y_dict[k[0]] = count
        count+=1
    
    g_list = []
    for k, v in pattern_dict.items():
        if v =='g':
            g_list.append(k[0])

    remove_list = []
    for k, v in y_dict.items():
        for word in pool:
            if (word.find(k[0]) == v):
                remove_list.append(word)
    
    pool = [word for word in pool if word not in remove_list]
    
    y_list = list(y_dict.keys())
    if len(y_list) != 0:
        for y_letter in y_list:
            pool = [word for word in pool if y_letter in word]
    
    # select the pairs in updated word list from the word usage frequency dictionary
    freq_d = {word_key: usage_freq_d[word_key] for word_key in pool}
    sorted_freq_d = {k: v for k, v in reversed(sorted(freq_d.items(), key=lambda item: item[1]))}
    return sorted_freq_d
    #return list(sorted_freq_d.keys())

In [8]:
usage_freq_d = create_usage_freq_d(word_pool_nyt)

In [9]:
# check the word usage frequency dictionary
list(usage_freq_d.items())[:10]

[('about', 6.4),
 ('their', 6.33),
 ('there', 6.31),
 ('which', 6.3),
 ('would', 6.27),
 ('other', 6.16),
 ('first', 6.11),
 ('after', 6.11),
 ('think', 6.08),
 ('could', 6.06)]

In [10]:
sample_answer = "frame"

In [11]:
#testing the function
new_pool = update_pool(word_pool_nyt, 'roate', 'ybgbg')
new_pool

{'share': 5.2,
 'grade': 4.8,
 'aware': 4.78,
 'frame': 4.58,
 'grace': 4.56,
 'brave': 4.33,
 'grave': 4.31,
 'spare': 4.28,
 'scare': 4.07,
 'drake': 3.96,
 'brake': 3.95,
 'crane': 3.92,
 'brace': 3.72,
 'grape': 3.7,
 'erase': 3.66,
 'flare': 3.64,
 'crave': 3.47,
 'glare': 3.36,
 'craze': 3.35,
 'snare': 3.25,
 'graze': 3.02,
 'drape': 2.74,
 'blare': 2.39,
 'brane': 2.08,
 'whare': 2.02,
 'crake': 1.96,
 'beare': 1.93,
 'braze': 1.84,
 'sware': 1.83,
 'crape': 1.83,
 'phare': 1.75,
 'quare': 1.71,
 'deare': 1.61,
 'brame': 1.54,
 'heare': 1.42,
 'chare': 1.38,
 'feare': 1.36,
 'crame': 1.33,
 'meare': 1.15,
 'drave': 1.11,
 'prase': 1.01,
 'urase': 0.0,
 'urare': 0.0,
 'seare': 0.0,
 'peare': 0.0,
 'maare': 0.0,
 'leare': 0.0,
 'irade': 0.0,
 'grame': 0.0,
 'geare': 0.0,
 'frape': 0.0,
 'crare': 0.0,
 'arame': 0.0}

In [12]:
new_pool_1 = update_pool(new_pool, 'share', 'bbgyg')
new_pool_1

{'grade': 4.8,
 'frame': 4.58,
 'grace': 4.56,
 'brave': 4.33,
 'grave': 4.31,
 'drake': 3.96,
 'brake': 3.95,
 'crane': 3.92,
 'brace': 3.72,
 'grape': 3.7,
 'crave': 3.47,
 'craze': 3.35,
 'graze': 3.02,
 'drape': 2.74,
 'brane': 2.08,
 'crake': 1.96,
 'braze': 1.84,
 'crape': 1.83,
 'brame': 1.54,
 'crame': 1.33,
 'drave': 1.11,
 'arame': 0.0,
 'crare': 0.0,
 'frape': 0.0,
 'grame': 0.0,
 'irade': 0.0,
 'urare': 0.0}

In [13]:
new_pool_2 = update_pool(new_pool_1, 'grade', 'bggbg')
new_pool_2

{'frame': 4.58,
 'brave': 4.33,
 'brake': 3.95,
 'crane': 3.92,
 'brace': 3.72,
 'crave': 3.47,
 'craze': 3.35,
 'brane': 2.08,
 'crake': 1.96,
 'braze': 1.84,
 'crape': 1.83,
 'brame': 1.54,
 'crame': 1.33,
 'urare': 0.0,
 'frape': 0.0,
 'crare': 0.0,
 'arame': 0.0}

In [14]:
new_pool_3 = update_pool(new_pool_1, 'frame', 'ggggg')
new_pool_3

{'frame': 4.58}