In [1]:
from collections import Counter

import pandas as pd

## Wordle cheats

1. Load the wordle words
1. Use `shrinky()` to reduce to subset of words that meet current info. This is more valuable than step 3 even.
2. Use the `cheater()` function to rank the possible guesses.

In [2]:
# step 1 - load the data
url = "https://raw.githubusercontent.com/donbowen/wordle/main/wordle.csv"
words = pd.read_csv(url, names=["whole_word"])

# and create columns holding each letter in the given position
words[["pos1", "pos2", "pos3", "pos4", "pos5"]] = words["whole_word"].str.split(
    "", expand=True
)[[1, 2, 3, 4, 5]]

## Load some funcs 

In [3]:
def any_hits_criteria(word, df):
    myregex = "(" + "|".join(word) + ")"
    cov1 = df["whole_word"].str.count(myregex) >= 1
    cov2 = df["whole_word"].str.count(myregex) >= 2
    cov3 = df["whole_word"].str.count(myregex) >= 3
    cov4 = df["whole_word"].str.count(myregex) >= 4
    cov5 = df["whole_word"].str.count(myregex) >= 5
    out = pd.Series(
        [cov1.mean(), cov2.mean(), cov3.mean(), cov5.mean(), cov5.mean()],
        index=["cov1+", "cov2+", "cov3+", "cov4+", "cov5+"],
    )
    return out


def use_any_hits(df, sortcol=4):
    """
    sortcol : int, 1-5
    """
    return df.merge(
        df["whole_word"].apply(any_hits_criteria, df=df),
        left_index=True,
        right_index=True,
    ).sort_values("cov" + str(sortcol) + "+", ascending=False)

def dumb_exact(word, df):
    """
    how many words in the master list does this word
    have 1 exact position match for? 2? 3? 4? 5?
    """
    newindex=[0,1,2,3,4,5]
    
    out = (
        pd.concat(
            [(df["whole_word"].str[i] == c).astype(int) for i, c in enumerate(word)],
            axis=1,
        )
        .sum(axis=1)
        .value_counts()
        .reindex(index=newindex,fill_value=0)[1:]
        .fillna(0)
    )
#     print(out)
    out.index = ['exact1+','exact2+','exact3+','exact4+','exact5+']

#     out = pd.Series(
#         [cov1.mean(), cov2.mean(), cov3.mean(), cov5.mean(), cov5.mean()],
#         index=["cov1+", "cov2+", "cov3+", "cov4+", "cov5+"],
#     )
    return out

def use_exact(df, sortcol=4):
    """
    sortcol : int, 1-5
    """
    return df.merge(
        df["whole_word"].apply(dumb_exact, df=df),
        left_index=True,
        right_index=True,
    ).sort_values("exact" + str(sortcol) + "+", ascending=False)

def cheater(df):
    return (
        pd
        .merge(use_exact(df, 3), use_any_hits(df, 3), on="whole_word")
        .filter(regex="(whole|exact|cov)")
        .drop(labels=['exact1+','exact2+','cov1+','cov2+'],axis=1)
    )    

def shrinky(exact='-----',include='',exclude='',df=words):
    '''
    exact = String with 5 hyphens. replace the corresponding one with 
            a letter when you know it's in that spot (green).
            Example: exact = '--a--' 
    include = String with letters in yellow
              Example: include = 'tr' will require words to have t and r
    exclude = String with letters in below
              Example: include = 'tr' will EXCLUDE words with t or r
    '''
    if len(exclude) > 0:
        exclude = '('+''.join([c + '|' for c in exclude[:-1]])+exclude[-1]+')'
        df = df[(~df["whole_word"].str.contains(exclude))]

    if len(include) > 0:
        for c in include:
            df = df[df["whole_word"].str.contains(c)]

    for i,c in enumerate(exact):
        if c != '-':
            df = df[df["whole_word"].str[i] == c]
    
    return(df)

## Time to cheat

In [4]:
# step 2 - reduce to subset
subset = shrinky('--a--','r')

In [5]:
# step 3 - use the funcs, then sort and explore to pick next word
cheater(subset)

Unnamed: 0,whole_word,exact3+,exact4+,exact5+,cov3+,cov4+,cov5+
0,crane,29,4,1,0.658333,0.008333,0.008333
1,brace,28,4,1,0.616667,0.008333,0.008333
2,erase,26,0,1,0.558333,0.008333,0.008333
3,frame,26,0,1,0.516667,0.016667,0.016667
4,grace,26,7,1,0.625000,0.008333,0.008333
...,...,...,...,...,...,...,...
115,reach,3,2,1,0.616667,0.008333,0.008333
116,realm,3,1,1,0.508333,0.025000,0.025000
117,roach,2,1,1,0.425000,0.008333,0.008333
118,roast,2,0,1,0.500000,0.016667,0.016667
