Someone on Twitter asked me to check this word list

In [1]:
first_guess_words = [w.lower() for w in 'Arson, Cited, Lumpy'.split(', ')]
first_guess_words

['arson', 'cited', 'lumpy']

import utilities

In [2]:
import os

os.getcwd() + '/..'

'/Users/Daniel_Kats/prog/wordle/notebooks/..'

In [3]:
import sys
sys.path.append('..')

In [4]:
# read_parsed_words -> all valid 5-letter words
# read_parsed_answers -> all actual wordle answers

from parse_data import read_parsed_answers, read_parsed_words

## Cheating

Use just the list of known answers

In [5]:
import numpy as np

# recall - the possibilities table is an n x m matrix where each entry is a base 3 number smaller than 255
# the number of rows = number of guess words
# the number of cols = number of answer words
# so entry (i, j) is the result you get by guessing word i when the solution is answer word j
# in the asymmetric case, the first n words are the answer words in alphabetical order
# the remaining guess words are sorted in alphabetical order

table = np.load('../data-parsed/possibilities-table-asymmetric-base-3.npy')
table.shape

(12972, 2315)

In [6]:
answers = read_parsed_answers()
answers

Unnamed: 0,date,day,answer
0,2021-06-19,0,CIGAR
1,2021-06-20,1,REBUT
2,2021-06-21,2,SISSY
3,2021-06-22,3,HUMPH
4,2021-06-23,4,AWAKE
...,...,...,...
2310,2027-10-16,2310,JUDGE
2311,2027-10-17,2311,ROWER
2312,2027-10-18,2312,ARTSY
2313,2027-10-19,2313,RURAL


In [7]:
answer_words = list(answers.answer.str.lower().values)
answer_words_chrono = answer_words[:]

answer_words.sort()

len(answer_words)

2315

massage guess words to be in the right order

In [8]:
guess_words = read_parsed_words()
guess_words

guess_words_orig = guess_words[:]

# we need to recalibrate how these guess words are ordered
s_guess_words = set(guess_words)
s_answer_words = set(answer_words)
s_remaining_words = s_guess_words - s_answer_words
# len(s_remaining_words)

remaining_words = list(s_remaining_words)
guess_words = answer_words + remaining_words
len(guess_words)

assert len(guess_words) == len(guess_words_orig)
assert set(guess_words) == set(guess_words_orig)
assert set(guess_words[:len(answer_words)]) == set(answer_words)


In [53]:
from decision_tree import find_possible_answers, ALL_LETTERS_CORRECT
from possibilities_table import guess_response_to_string
from typing import List


def try_strategy(answer_word: str,
                answer_words: List[str],
                table: np.ndarray) -> List[int]:
    """Return a list of possible answers after applying the strategy
    Those answers are indexes into answer_words
    """
    
    j = answer_words.index(answer_word)
    guess_results = []
    guesses = []
    
    for gw in first_guess_words:
        i = guess_words.index(gw)
        guesses.append(i)
        guess_results.append(table[i, j])
        if guess_results[-1] == ALL_LETTERS_CORRECT:
            break
        
#     print(answer_word)
    
#     print('guess results:')
#     for g, gr in zip(guesses, guess_results):
#         grv = guess_response_to_string(gr)
#         print(f'{g} -> {grv}')
#     print(guess_results)
    
    pas = find_possible_answers(guesses, guess_results, table)
#     print(pas)
    
#     print('')
#     print('possible answers:')
#     for pa in pas:
#         print(guess_words[pa])
        
    return list(pas)
    
    
try_strategy('cigar', answer_words, table)

[352, 357, 390, 2247, 392, 619, 2197]

In [32]:
rows = []
d = {}

for i, answer_word in enumerate(answer_words_chrono):
    pas = try_strategy(answer_word)
    d[answer_word] = pas
    
    rows.append({
        'answer_day': i,
        'answer_word': answer_word,
        'num_possibilities': len(pas)
    })

In [33]:
import pandas as pd

df = pd.DataFrame(rows)
df

Unnamed: 0,answer_day,answer_word,num_possibilities
0,0,cigar,7
1,1,rebut,2
2,2,sissy,2
3,3,humph,1
4,4,awake,4
...,...,...,...
2310,2310,judge,7
2311,2311,rower,11
2312,2312,artsy,1
2313,2313,rural,1


In [46]:
df.iloc[df.num_possibilities.idxmax()]

answer_day              32
answer_word          helix
num_possibilities       17
Name: 32, dtype: object

In [47]:
df.num_possibilities.mean()

3.493304535637149

In [36]:
df.iloc[df.num_possibilities.idxmin()]

answer_day               3
answer_word          humph
num_possibilities        1
Name: 3, dtype: object

In [37]:
first_guess_words

['arson', 'cited', 'lumpy']

In [41]:
answer_words.index('lumpy')

1165

In [45]:
df[df['answer_word'] == 'humph']

Unnamed: 0,answer_day,answer_word,num_possibilities
3,3,humph,1


In [49]:
df[['num_possibilities']]

Unnamed: 0,num_possibilities
0,7
1,2
2,2
3,1
4,4
...,...
2310,7
2311,11
2312,1
2313,1


## All 5-letter Words

In [54]:
rows = []
d = {}

my_table = np.load('../data-parsed/possibilities-table-base-3.npy')

for i, answer_word in enumerate(guess_words):
    pas = try_strategy(
        answer_word,
        answer_words=guess_words,
        table=my_table
    )
    d[answer_word] = pas
    
    rows.append({
        'answer_day': i,
        'answer_word': answer_word,
        'num_possibilities': len(pas)
    })
    
master_df = pd.DataFrame(rows)
master_df

Unnamed: 0,answer_day,answer_word,num_possibilities
0,0,aback,1
1,1,abase,8
2,2,abate,2
3,3,abbey,2
4,4,abbot,2
...,...,...,...
12967,12967,jonty,41
12968,12968,parol,24
12969,12969,spoom,60
12970,12970,munts,20


In [56]:
master_df.iloc[master_df.num_possibilities.idxmax()]

answer_day            3573
answer_word          sapan
num_possibilities      169
Name: 3573, dtype: object