In [1]:
from collections import Counter

import pandas as pd

## Wordle cheats

1. Load the wordle words
1. Reduce to subset of words that meet current info. This is more valuable than step 3 even.
    - Excluding letters: `words[~words["whole_word"].str.contains('(a|b|c)')]` will drop words with a, b, c
    - Require letters: `words[words["whole_word"].str.contains('a')]` without the `~` will require a 
    - Not at this position: `words[words["pos2"] == "a"]` drop words with a in the second spot    
    - Exact position: `words[words["pos1"] == "a"]` will require a in the first spot
2. Use the "cheater" function.

**Combining info syntax:** 
```python
subset = words[
    (
        (<cond1>)
        & (<cond2>)
        ...
        & (<condN>)
    )
]
```

Example:
```python
subset = words[
    (
        (~words["whole_word"].str.contains('(a|i|s)'))
        & (words["whole_word"].str.contains('r'))
        & (words["pos5"] == "e")
    )
]
```

## Load some funcs 

In [9]:
def any_hits_criteria(word, df):
    myregex = "(" + "|".join(word) + ")"
    cov1 = df["whole_word"].str.count(myregex) >= 1
    cov2 = df["whole_word"].str.count(myregex) >= 2
    cov3 = df["whole_word"].str.count(myregex) >= 3
    cov4 = df["whole_word"].str.count(myregex) >= 4
    cov5 = df["whole_word"].str.count(myregex) >= 5
    out = pd.Series(
        [cov1.mean(), cov2.mean(), cov3.mean(), cov5.mean(), cov5.mean()],
        index=["cov1+", "cov2+", "cov3+", "cov4+", "cov5+"],
    )
    return out


def use_any_hits(df, sortcol=4):
    """
    sortcol : int, 1-5
    """
    return df.merge(
        df["whole_word"].apply(any_hits_criteria, df=df),
        left_index=True,
        right_index=True,
    ).sort_values("cov" + str(sortcol) + "+", ascending=False)

def dumb_exact(word, df):
    """
    how many words in the master list does this word
    have 1 exact position match for? 2? 3? 4? 5?
    """
    newindex=[0,1,2,3,4,5]
    
    out = (
        pd.concat(
            [(df["whole_word"].str[i] == c).astype(int) for i, c in enumerate(word)],
            axis=1,
        )
        .sum(axis=1)
        .value_counts()
        .reindex(index=newindex,fill_value=0)[1:]
        .fillna(0)
    )
#     print(out)
    out.index = ['exact1+','exact2+','exact3+','exact4+','exact5+']

#     out = pd.Series(
#         [cov1.mean(), cov2.mean(), cov3.mean(), cov5.mean(), cov5.mean()],
#         index=["cov1+", "cov2+", "cov3+", "cov4+", "cov5+"],
#     )
    return out

def use_exact(df, sortcol=4):
    """
    sortcol : int, 1-5
    """
    return df.merge(
        df["whole_word"].apply(dumb_exact, df=df),
        left_index=True,
        right_index=True,
    ).sort_values("exact" + str(sortcol) + "+", ascending=False)

def cheater(df):
    return (
        pd
        .merge(use_exact(df, 3), use_any_hits(df, 3), on="whole_word")
        .filter(regex="(whole|exact|cov)")
        .drop(labels=['exact1+','exact2+','cov1+','cov2+'],axis=1)
    )    

## Time to cheat

In [3]:
# step 1 - load the data
words = pd.read_csv("wordle.csv", names=["whole_word"])

# and create columns holding each letter in the given position
words[["pos1", "pos2", "pos3", "pos4", "pos5"]] = words["whole_word"].str.split(
    "", expand=True
)[[1, 2, 3, 4, 5]]

In [17]:
words.query("['a','b','c'] not in whole_word")

Unnamed: 0,whole_word,pos1,pos2,pos3,pos4,pos5
0,cigar,c,i,g,a,r
1,rebut,r,e,b,u,t
2,sissy,s,i,s,s,y
3,humph,h,u,m,p,h
4,awake,a,w,a,k,e
...,...,...,...,...,...,...
2310,judge,j,u,d,g,e
2311,rower,r,o,w,e,r
2312,artsy,a,r,t,s,y
2313,rural,r,u,r,a,l


In [36]:
# step 2 - reduce to subset
subset = words[
    (
        words.whole_word.str.contains("u")
        & words.whole_word.str.contains("e")
        & (words["pos3"] != "u")
        & (words["pos5"] != "e")
    )
]
subset

Unnamed: 0,whole_word,pos1,pos2,pos3,pos4,pos5
1,rebut,r,e,b,u,t
16,quiet,q,u,i,e,t
36,unfed,u,n,f,e,d
52,unmet,u,n,m,e,t
154,surer,s,u,r,e,r
169,usher,u,s,h,e,r
196,rebus,r,e,b,u,s
205,query,q,u,e,r,y
237,ulcer,u,l,c,e,r
332,fetus,f,e,t,u,s


In [37]:
# step 3 - use the funcs, then sort and explore to pick next word
cheater(subset)

Unnamed: 0,whole_word,exact3+,exact4+,exact5+,cov3+,cov4+,cov5+
0,queer,12,1,1,0.644444,0.022222,0.022222
1,under,9,1,1,0.822222,0.088889,0.088889
2,buyer,9,0,1,0.666667,0.022222,0.022222
3,outer,9,0,1,0.8,0.044444,0.044444
4,ruder,9,1,1,0.711111,0.044444,0.044444
5,tuber,8,0,1,0.866667,0.066667,0.066667
6,super,8,1,1,0.755556,0.088889,0.088889
7,purer,7,1,1,0.622222,0.044444,0.044444
8,upper,7,0,1,0.622222,0.044444,0.044444
9,ruler,7,1,1,0.622222,0.022222,0.022222
