https://www.wired.com/story/best-wordle-tips/

In [None]:
%load_ext kedro.ipython

In [None]:
%reload_kedro

In [3]:
import sys
sys.path.append("../../src/projectwordle")

In [4]:
import polars as pl
import numpy as np
from IPython.display import HTML
from projectwordle.utils import (
    color_pattern_matching,
    difficulty_distribution,
    plot_most_difficult_words,
)

pl.Config(tbl_rows=48)

[1m<[0m[1;95mpolars.config.Config[0m[39m object at [0m[1;36m0x15f2d3400[0m[1m>[0m

# Load Data

In [None]:
five_letter_word_anagrams = catalog.load("five_letter_words_anagrams")
simulating_openers = catalog.load("simulating_openers")

In [6]:
simulating_tries_difficulty = (
    simulating_openers
    .select(
        "challenge", "guess", "letter_differences", "common_letters",
        "num_diff_letters", "num_common_letters", "num_matching_index",
        "match_pattern", "num_choices_after_guess", "possible_guesses",
        "challenge_in_possible_guesses", "next_guess", "group", "tries",
        "difficulty", "guess_word_anagrams", "anagram_num"
    )
    .join(
        five_letter_word_anagrams.select("words", "anagrams", "anagram_num"),
        left_on="next_guess",
        right_on="words",
        how="left",
        coalesce=True
    )
)

In [7]:
simulating_openers.head(6)

index,challenge,guess,match_pattern,letter_differences,common_letters,num_diff_letters,num_common_letters,num_matching_index,num_choices_after_guess,possible_guesses,challenge_in_possible_guesses,next_guess,group,tries,difficulty,guess_word_anagrams,anagram_num
u32,str,str,str,str,str,u8,u8,u8,u16,str,bool,str,u32,u8,str,str,u8
0,"""yummy""","""saint""","""BBBBB""","""nisat""","""""",5,0,0,1222,"""fully, chord, power, offer, er…",False,"""loure""",0,5,"""hard""","""saint, stain, satin, antis, na…",5
1,"""yummy""","""loure""","""BBYBB""","""nisaetolr""","""u""",9,1,0,56,"""dummy, fuzzy, muddy, puppy, bu…",False,"""dummy""",0,5,"""hard""","""loure, roule""",2
2,"""yummy""","""dummy""","""BGGGG""","""nidsaetolr""","""umy""",10,3,3,3,"""mummy, gummy, yummy""",True,"""mummy""",0,5,"""hard""","""dummy""",1
3,"""yummy""","""mummy""","""YGGGG""","""nidsaetolr""","""umy""",10,3,3,2,"""yummy, gummy""",True,"""yummy""",0,5,"""hard""","""mummy""",1
4,"""yummy""","""yummy""","""GGGGG""","""""","""umy""",0,3,3,1,"""gummy""",False,"""yummy""",0,5,"""hard""","""yummy""",1
5,"""yummy""","""yummy""","""GGGGG""","""""","""umy""",0,3,3,1,"""gummy""",False,"""yummy""",0,5,"""hard""","""yummy""",1


In [8]:
simulating_tries_difficulty.head()

challenge,guess,letter_differences,common_letters,num_diff_letters,num_common_letters,num_matching_index,match_pattern,num_choices_after_guess,possible_guesses,challenge_in_possible_guesses,next_guess,group,tries,difficulty,guess_word_anagrams,anagram_num,anagrams,anagram_num_right
str,str,str,str,u8,u8,u8,str,u16,str,bool,str,u32,u8,str,str,u8,str,u8
"""yummy""","""saint""","""nisat""","""""",5,0,0,"""BBBBB""",1222,"""fully, chord, power, offer, er…",False,"""loure""",0,5,"""hard""","""saint, stain, satin, antis, na…",5,"""loure, roule""",2
"""yummy""","""loure""","""nisaetolr""","""u""",9,1,0,"""BBYBB""",56,"""dummy, fuzzy, muddy, puppy, bu…",False,"""dummy""",0,5,"""hard""","""loure, roule""",2,"""dummy""",1
"""yummy""","""dummy""","""nidsaetolr""","""umy""",10,3,3,"""BGGGG""",3,"""mummy, gummy, yummy""",True,"""mummy""",0,5,"""hard""","""dummy""",1,"""mummy""",1
"""yummy""","""mummy""","""nidsaetolr""","""umy""",10,3,3,"""YGGGG""",2,"""yummy, gummy""",True,"""yummy""",0,5,"""hard""","""mummy""",1,"""yummy""",1
"""yummy""","""yummy""","""""","""umy""",0,3,3,"""GGGGG""",1,"""gummy""",False,"""yummy""",0,5,"""hard""","""yummy""",1,"""yummy""",1


In [9]:
(
    simulating_tries_difficulty
    .unique(subset=["group"], keep="first")
    ["difficulty"]
    .value_counts()
    .sort("count", descending=True)
)

difficulty,count
str,u32
"""moderate""",5462
"""hard""",3041
"""very hard""",583
"""easy""",2


# Create dataframe of first guesses

In [10]:
# Group by "Category" and return the first row of each group
first_guess = (
    simulating_tries_difficulty
    .group_by("group", maintain_order=True)
    .first()
)

first_guess.head()

group,challenge,guess,letter_differences,common_letters,num_diff_letters,num_common_letters,num_matching_index,match_pattern,num_choices_after_guess,possible_guesses,challenge_in_possible_guesses,next_guess,tries,difficulty,guess_word_anagrams,anagram_num,anagrams,anagram_num_right
u32,str,str,str,str,u8,u8,u8,str,u16,str,bool,str,u8,str,str,u8,str,u8
0,"""yummy""","""saint""","""nisat""","""""",5,0,0,"""BBBBB""",1222,"""fully, chord, power, offer, er…",False,"""loure""",5,"""hard""","""saint, stain, satin, antis, na…",5,"""loure, roule""",2
1,"""calyx""","""saint""","""ntis""","""a""",4,1,1,"""BGBBB""",495,"""cargo, labor, favor, large, ha…",False,"""loure""",4,"""moderate""","""saint, stain, satin, antis, na…",5,"""loure, roule""",2
2,"""varas""","""saint""","""nit""","""sa""",3,2,1,"""YGBBB""",184,"""harsh, cause, wales, false, ja…",False,"""loure""",5,"""hard""","""saint, stain, satin, antis, na…",5,"""loure, roule""",2
3,"""bubba""","""saint""","""ntis""","""a""",4,1,0,"""BYBBB""",751,"""board, weary, cheap, grave, fe…",False,"""loure""",4,"""moderate""","""saint, stain, satin, antis, na…",5,"""loure, roule""",2
4,"""lowes""","""saint""","""nait""","""s""",4,1,0,"""YBBBB""",440,"""press, house, close, fresh, mo…",False,"""loure""",4,"""moderate""","""saint, stain, satin, antis, na…",5,"""loure, roule""",2


In [11]:
avg_tries = round(first_guess["tries"].mean(), 2)
avg_tries

[1;36m4.22[0m

# EDA

In [12]:
# Plot difficulty distribution
difficulty_distribution(
    simulating_tries_difficulty
)

## Most frequent number of guesses

In [13]:
(
    first_guess
    ["tries"]
    .value_counts()
    .sort(by="count", descending=True)
)

tries,count
u8,u32
4.0,3384
5.0,2130
3.0,2078
6.0,911
,583
2.0,1
1.0,1


## Anagrams  

Only the most frequently occuring anagrams were only considered for root words after the five letter words were processed to remove plurals, names and places. As such there won't be any anagrams for `leads` as it's root word is `lead` which is a 4-letter word.  

However, if our challenge word is `leads`, or even its anagram `deals`, we can use their anagram `slade` to make a correct guess.

In [14]:
# where challenge and guess are anagrams, removing instances where you guess right
# the first time to see how quickly we converge on the challenge word

anagrams = (
    simulating_tries_difficulty
    .filter(
        (pl.col("letter_differences") == "")
        & (pl.col("num_common_letters") == 5)
        & (pl.col("challenge") != pl.col("guess"))
    )
)

anagrams

challenge,guess,letter_differences,common_letters,num_diff_letters,num_common_letters,num_matching_index,match_pattern,num_choices_after_guess,possible_guesses,challenge_in_possible_guesses,next_guess,group,tries,difficulty,guess_word_anagrams,anagram_num,anagrams,anagram_num_right
str,str,str,str,u8,u8,u8,str,u16,str,bool,str,u32,u8,str,str,u8,str,u8
"""natis""","""saint""","""""","""saint""",0,5,1,"""YGYYY""",1,"""natis""",True,"""loure""",682,3,"""moderate""","""saint, stain, satin, antis, na…",5,"""loure, roule""",2
"""stain""","""saint""","""""","""saint""",0,5,1,"""GYYYY""",1,"""stain""",True,"""loure""",1880,3,"""moderate""","""saint, stain, satin, antis, na…",5,"""loure, roule""",2
"""antis""","""saint""","""""","""saint""",0,5,0,"""YYYYY""",1,"""antis""",True,"""loure""",4908,3,"""moderate""","""saint, stain, satin, antis, na…",5,"""loure, roule""",2
"""satin""","""saint""","""""","""saint""",0,5,2,"""GGYYY""",1,"""satin""",True,"""loure""",6584,3,"""moderate""","""saint, stain, satin, antis, na…",5,"""loure, roule""",2


In [15]:
rng = np.random.default_rng()

In [16]:
# Display the DataFrame with colored patterns

anagrams_select_group = rng.choice(anagrams["group"], 1, replace = False)[0]

HTML(
    simulating_tries_difficulty
    .filter(pl.col("group") == anagrams_select_group)
    .to_pandas()
    .assign(
        match_pattern = lambda df_:
        color_pattern_matching(
            dataf=df_,
            challenge_col="challenge",
            guess_col="guess"
        )
    )
    [[
        "challenge", "guess", "match_pattern", "next_guess",
        "num_choices_after_guess", "possible_guesses", "difficulty",
    ]]
    .to_html(escape=False)
)


Unnamed: 0,challenge,guess,match_pattern,next_guess,num_choices_after_guess,possible_guesses,difficulty
0,stain,saint,saint,loure,1,stain,moderate
1,stain,loure,loure,stain,1,stain,moderate
2,stain,stain,stain,stain,0,,moderate
3,stain,stain,stain,stain,0,,moderate
4,stain,stain,stain,stain,0,,moderate
5,stain,stain,stain,stain,0,,moderate


### Challenge words without `r` , `s` or `t`
These are some of the most frequently ocuuring letters. We can then see how our naive algorithm operates in coming to the correct guess.

In [17]:
challenge_words_without_r_s_t = (
    first_guess
    .filter(
        ~(pl.col("challenge").str.contains("r|s|t"))
        & ((pl.col("challenge") != (pl.col("guess"))))
    )
)

challenge_words_without_r_s_t.sample(n = 20, with_replacement = False)

group,challenge,guess,letter_differences,common_letters,num_diff_letters,num_common_letters,num_matching_index,match_pattern,num_choices_after_guess,possible_guesses,challenge_in_possible_guesses,next_guess,tries,difficulty,guess_word_anagrams,anagram_num,anagrams,anagram_num_right
u32,str,str,str,str,u8,u8,u8,str,u16,str,bool,str,u8,str,str,u8,str,u8
4268,"""yogic""","""saint""","""nats""","""i""",4,1,0,"""BBYBB""",482,"""micro, oleic, humid, liver, fi…",False,"""loure""",4.0,"""moderate""","""saint, stain, satin, antis, na…",5,"""loure, roule""",2
5263,"""gambe""","""saint""","""ntis""","""a""",4,1,1,"""BGBBB""",495,"""large, maybe, value, badly, ea…",False,"""loure""",5.0,"""hard""","""saint, stain, satin, antis, na…",5,"""loure, roule""",2
5160,"""codex""","""saint""","""nisat""","""""",5,0,0,"""BBBBB""",1222,"""force, gooey, power, romeo, ju…",False,"""loure""",4.0,"""moderate""","""saint, stain, satin, antis, na…",5,"""loure, roule""",2
5327,"""gouge""","""saint""","""nisat""","""""",5,0,0,"""BBBBB""",1222,"""occur, blood, model, wheel, fu…",False,"""loure""",4.0,"""moderate""","""saint, stain, satin, antis, na…",5,"""loure, roule""",2
3028,"""fonly""","""saint""","""atis""","""n""",4,1,0,"""BBBYB""",276,"""money, novel, knock, newly, gr…",False,"""loure""",5.0,"""hard""","""saint, stain, satin, antis, na…",5,"""loure, roule""",2
3631,"""embox""","""saint""","""nisat""","""""",5,0,0,"""BBBBB""",1222,"""lodge, model, fully, greek, le…",False,"""loure""",4.0,"""moderate""","""saint, stain, satin, antis, na…",5,"""loure, roule""",2
1289,"""baggy""","""saint""","""ntis""","""a""",4,1,1,"""BGBBB""",495,"""large, paper, value, early, ca…",False,"""loure""",6.0,"""hard""","""saint, stain, satin, antis, na…",5,"""loure, roule""",2
6592,"""daine""","""saint""","""ts""","""ain""",2,3,3,"""BGGGB""",4,"""rainy, raine, faine, daine""",True,"""loure""",4.0,"""moderate""","""saint, stain, satin, antis, na…",5,"""loure, roule""",2
5100,"""dhobi""","""saint""","""nats""","""i""",4,1,0,"""BBYBB""",482,"""movie, yield, river, field, pi…",False,"""loure""",5.0,"""hard""","""saint, stain, satin, antis, na…",5,"""loure, roule""",2
1189,"""mamee""","""saint""","""ntis""","""a""",4,1,1,"""BGBBB""",495,"""major, vague, value, valve, fa…",False,"""loure""",6.0,"""hard""","""saint, stain, satin, antis, na…",5,"""loure, roule""",2


In [18]:
# Plot difficulty distribution of of challenge words that doesn't contain 'r', 's' or 't'
difficulty_distribution(challenge_words_without_r_s_t)

## Null values (Incomplete games)

In [19]:
incomplete_games = (
    simulating_tries_difficulty
    .filter(pl.col("tries").is_null())
)

In [20]:
incomplete_games.head()

challenge,guess,letter_differences,common_letters,num_diff_letters,num_common_letters,num_matching_index,match_pattern,num_choices_after_guess,possible_guesses,challenge_in_possible_guesses,next_guess,group,tries,difficulty,guess_word_anagrams,anagram_num,anagrams,anagram_num_right
str,str,str,str,u8,u8,u8,str,u16,str,bool,str,u32,u8,str,str,u8,str,u8
"""scaly""","""saint""","""nit""","""sa""",3,2,1,"""GYBBB""",171,"""small, share, shall, space, sp…",False,"""loure""",11,,"""very hard""","""saint, stain, satin, antis, na…",5,"""loure, roule""",2
"""scaly""","""loure""","""nuieotr""","""sal""",7,3,1,"""YBBBB""",22,"""shall, small, scalp, scala, sp…",True,"""shall""",11,,"""very hard""","""loure, roule""",2,"""shall""",1
"""scaly""","""shall""","""nuieothr""","""sal""",8,3,2,"""GBGGY""",9,"""slack, scalp, scala, scaly, sc…",True,"""slack""",11,,"""very hard""","""shall""",1,"""slack""",1
"""scaly""","""slack""","""nukieothr""","""salc""",9,4,2,"""GYGYB""",4,"""scalp, scala, scaly, scald""",True,"""scalp""",11,,"""very hard""","""slack""",1,"""scalp, clasp""",2
"""scaly""","""scalp""","""nukipeothr""","""salc""",10,4,4,"""GGGGB""",3,"""scala, scald, scaly""",True,"""scala""",11,,"""very hard""","""scalp, clasp""",2,"""scala""",1


In [21]:
null_select_group = rng.choice(incomplete_games["group"], 1, replace = False)[0]

HTML(
    incomplete_games
    .filter(pl.col("group") == null_select_group)
    .to_pandas()
    .assign(
        match_pattern = lambda df_:
        color_pattern_matching(
            dataf=df_,
            challenge_col="challenge",
            guess_col="guess"
        )
    )
    [[
        "challenge", "guess", "match_pattern", "next_guess",
        "num_choices_after_guess", "possible_guesses", "difficulty",
    ]]
    .to_html(escape=False)
)

Unnamed: 0,challenge,guess,match_pattern,next_guess,num_choices_after_guess,possible_guesses,difficulty
0,cabby,saint,saint,loure,495,"early, carry, major, badly, karma, value, happy, mayor, march, havoc",very hard
1,cabby,loure,loure,daddy,77,"daddy, happy, gamma, paddy, madam, mamma, jazzy, pappy, gamba, kappa",very hard
2,cabby,daddy,daddy,happy,29,"happy, mammy, jacky, baggy, gabby, pappy, cabby, gawky, jazzy, wacky",very hard
3,cabby,happy,happy,mammy,19,"mammy, baggy, jacky, gabby, wacky, gawky, jazzy, cabby, mawky, yabby",very hard
4,cabby,mammy,mammy,baggy,14,"baggy, jacky, gabby, wacky, gawky, jazzy, cabby, yabby, faggy, baffy",very hard
5,cabby,baggy,baggy,cabby,2,"cabby, yabby",very hard


In [22]:
incomplete_games_check = (
    incomplete_games
    .filter(pl.col("group") == null_select_group)
    .select(
        "challenge", "guess", "match_pattern", "num_choices_after_guess",
        "possible_guesses", "challenge_in_possible_guesses", "next_guess"
    )
)

incomplete_games_check

challenge,guess,match_pattern,num_choices_after_guess,possible_guesses,challenge_in_possible_guesses,next_guess
str,str,str,u16,str,bool,str
"""cabby""","""saint""","""BGBBB""",495,"""early, carry, major, badly, ka…",False,"""loure"""
"""cabby""","""loure""","""BBBBB""",77,"""daddy, happy, gamma, paddy, ma…",False,"""daddy"""
"""cabby""","""daddy""","""BGBBG""",29,"""happy, mammy, jacky, baggy, ga…",True,"""happy"""
"""cabby""","""happy""","""BGBBG""",19,"""mammy, baggy, jacky, gabby, wa…",True,"""mammy"""
"""cabby""","""mammy""","""BGBBG""",14,"""baggy, jacky, gabby, wacky, ga…",True,"""baggy"""
"""cabby""","""baggy""","""YGBBG""",2,"""cabby, yabby""",True,"""cabby"""


In [23]:
(
    incomplete_games_check
    .gather_every(n=1)
    .head(1)
    ["possible_guesses"]
    .item()
)

[32m'early, carry, major, badly, karma, value, happy, mayor, march, havoc'[0m

In [24]:
(
    incomplete_games_check
    .gather_every(n=1, offset=1)
    .head(1)
    ["possible_guesses"]
    .item()
)

[32m'daddy, happy, gamma, paddy, madam, mamma, jazzy, pappy, gamba, kappa'[0m

In [25]:
(
    incomplete_games_check
    .gather_every(n=1, offset=2)
    .head(1)
    ["possible_guesses"]
    .item()
)

[32m'happy, mammy, jacky, baggy, gabby, pappy, cabby, gawky, jazzy, wacky'[0m

In [26]:
(
    incomplete_games_check
    .gather_every(n=1, offset=3)
    .head(1)
    ["possible_guesses"]
    .item()
)

[32m'mammy, baggy, jacky, gabby, wacky, gawky, jazzy, cabby, mawky, yabby'[0m

In [27]:
(
    incomplete_games_check
    .gather_every(n=1, offset=4)
    .head(1)
    ["possible_guesses"]
    .item()
)

[32m'baggy, jacky, gabby, wacky, gawky, jazzy, cabby, yabby, faggy, baffy'[0m

In [28]:
(
    incomplete_games_check
    .gather_every(n=1, offset=5)
    .head(1)
    ["possible_guesses"]
    .item()
)

[32m'cabby, yabby'[0m

### Plot incomplete games stats

In [29]:
incomplete_games_stats = (
    incomplete_games
    .select("challenge")
    .unique(keep="first")
    .join(
        (
            incomplete_games
            .unique(subset="group", keep="first")
            ["challenge"]
            .value_counts()
        ),
        left_on="challenge",
        right_on="challenge"
    )
    .rename({"count": "incomplete_games_count"})
    .with_columns(
        pl.col("incomplete_games_count").cast(pl.UInt8),
        incomplete_games_pct = (
            (pl.col("incomplete_games_count") / first_guess["guess"].n_unique() * 100)
            .round(2)
            .cast(pl.Float32)
        )
    )
    .sort(by="incomplete_games_pct", descending=True)
)

incomplete_games_stats.head()

challenge,incomplete_games_count,incomplete_games_pct
str,u8,f32
"""delly""",1,100.0
"""woozy""",1,100.0
"""scraw""",1,100.0
"""caese""",1,100.0
"""rakee""",1,100.0


In [30]:
incomplete_games_count = (
    first_guess
    .filter(pl.col("difficulty") == "very hard")
    .group_by("challenge")
    .len()
    .rename({"len": "incomplete_games_count"})
    .sort("incomplete_games_count", descending=True)
)

incomplete_games_count.head()

challenge,incomplete_games_count
str,u32
"""cavie""",1
"""ceric""",1
"""minny""",1
"""mutch""",1
"""pyoid""",1


In [31]:
plot_most_difficult_words(dataf=incomplete_games_stats)

## Words never guessed

In [32]:
(
    incomplete_games_stats
    .filter(pl.col("incomplete_games_pct") == 100)
    .head()
)

challenge,incomplete_games_count,incomplete_games_pct
str,u8,f32
"""delly""",1,100.0
"""woozy""",1,100.0
"""scraw""",1,100.0
"""caese""",1,100.0
"""rakee""",1,100.0


# Challenge word statistics

In [33]:
challenge_stats = (
    simulating_tries_difficulty
    .group_by("challenge")
    .agg(
        tries_mode=pl.col("tries").mode().cast(pl.UInt8),
        tries_mean=(pl.col("tries").mean().cast(pl.Float32)).round(3),
        tries_null_pct=(pl.col("tries").is_null().mean().cast(pl.Float32) * 100).round(3),
    )
    .explode("tries_mode")
    .sort("tries_null_pct", descending = True)
)

In [34]:
challenge_stats.head()

challenge,tries_mode,tries_mean,tries_null_pct
str,u8,f32,f32
"""cobby""",,,100.0
"""welke""",,,100.0
"""humpy""",,,100.0
"""hexer""",,,100.0
"""edger""",,,100.0


In [35]:
print(f"Overall incompletion rate: {(first_guess['tries'].is_null().sum() / first_guess.height) * 100:.2f}%")

Overall incompletion rate: 6.42%


## Retrieve the 2nd row of every group of 6 rows.

### Determine the average number of choices remaining after the second guess. This will tell us how effective the two guess strategy is at solving the challenge word.

In [36]:
second_guess_df = (
    simulating_tries_difficulty
    .with_row_index(name="row_num")
    .filter(
        (pl.col("row_num") % 6 == 1) &
        ~(pl.col("guess") == "saint") # remove instance where 'saint' as first guess is correct
    )
    .drop("row_num")
)

In [37]:
second_guess_df.head()

challenge,guess,letter_differences,common_letters,num_diff_letters,num_common_letters,num_matching_index,match_pattern,num_choices_after_guess,possible_guesses,challenge_in_possible_guesses,next_guess,group,tries,difficulty,guess_word_anagrams,anagram_num,anagrams,anagram_num_right
str,str,str,str,u8,u8,u8,str,u16,str,bool,str,u32,u8,str,str,u8,str,u8
"""yummy""","""loure""","""nisaetolr""","""u""",9,1,0,"""BBYBB""",56,"""dummy, fuzzy, muddy, puppy, bu…",False,"""dummy""",0,5,"""hard""","""loure, roule""",2,"""dummy""",1.0
"""calyx""","""loure""","""nuisetor""","""al""",8,2,1,"""YBBBB""",44,"""palmy, papal, wally, badly, ma…",True,"""palmy""",1,4,"""moderate""","""loure, roule""",2,,
"""varas""","""loure""","""nuietol""","""sar""",7,3,1,"""BBBYB""",21,"""harsh, marsh, rajas, warps, ra…",True,"""harsh""",2,5,"""hard""","""loure, roule""",2,"""harsh""",1.0
"""bubba""","""loure""","""nisetolr""","""au""",8,2,0,"""BBYBB""",21,"""quack, yucca, bubba, guava, gu…",True,"""quack""",3,4,"""moderate""","""loure, roule""",2,"""quack""",1.0
"""lowes""","""loure""","""nuiatr""","""sloe""",6,4,2,"""GGBBY""",4,"""loess, lowes, losel, lomes""",True,"""loess""",4,4,"""moderate""","""loure, roule""",2,"""loess""",1.0


In [38]:
# More efficiently 
second_guess_df_2 = (
    simulating_tries_difficulty
    .gather_every(n=6, offset=1)
    .filter(~(pl.col("guess") == "saint")) # remove instance where 'saint' as first guess is correct
)

second_guess_df_2.head()

challenge,guess,letter_differences,common_letters,num_diff_letters,num_common_letters,num_matching_index,match_pattern,num_choices_after_guess,possible_guesses,challenge_in_possible_guesses,next_guess,group,tries,difficulty,guess_word_anagrams,anagram_num,anagrams,anagram_num_right
str,str,str,str,u8,u8,u8,str,u16,str,bool,str,u32,u8,str,str,u8,str,u8
"""yummy""","""loure""","""nisaetolr""","""u""",9,1,0,"""BBYBB""",56,"""dummy, fuzzy, muddy, puppy, bu…",False,"""dummy""",0,5,"""hard""","""loure, roule""",2,"""dummy""",1.0
"""calyx""","""loure""","""nuisetor""","""al""",8,2,1,"""YBBBB""",44,"""palmy, papal, wally, badly, ma…",True,"""palmy""",1,4,"""moderate""","""loure, roule""",2,,
"""varas""","""loure""","""nuietol""","""sar""",7,3,1,"""BBBYB""",21,"""harsh, marsh, rajas, warps, ra…",True,"""harsh""",2,5,"""hard""","""loure, roule""",2,"""harsh""",1.0
"""bubba""","""loure""","""nisetolr""","""au""",8,2,0,"""BBYBB""",21,"""quack, yucca, bubba, guava, gu…",True,"""quack""",3,4,"""moderate""","""loure, roule""",2,"""quack""",1.0
"""lowes""","""loure""","""nuiatr""","""sloe""",6,4,2,"""GGBBY""",4,"""loess, lowes, losel, lomes""",True,"""loess""",4,4,"""moderate""","""loure, roule""",2,"""loess""",1.0


In [39]:
print(f'Avg num choices after 2nd guess: {round(second_guess_df_2["num_choices_after_guess"].mean(), 2)}')

Avg num choices after 2nd guess: 15.68


### Greatest num of choices left after second guess

In [40]:
max_num_choices_after_2nd_guess = (
    second_guess_df_2
    .sort("num_choices_after_guess", descending=True)
    ["num_choices_after_guess"]
    .head(1)
    .item()
)

max_num_choices_after_2nd_guess

[1;36m81[0m

In [41]:
(
    second_guess_df_2
    .filter(pl.col("num_choices_after_guess") == max_num_choices_after_2nd_guess)
    ["challenge"]
    .unique()
    .to_numpy()
)


[1;35marray[0m[1m([0m[1m[[0m[32m'boofy'[0m, [32m'goofy'[0m, [32m'poofy'[0m, [32m'bombo'[0m, [32m'comfy'[0m, [32m'cocky'[0m, [32m'cobby'[0m,
       [32m'poboy'[0m, [32m'gombo'[0m, [32m'pooch'[0m, [32m'gobbo'[0m, [32m'mooch'[0m, [32m'coppy'[0m, [32m'dodgy'[0m,
       [32m'compo'[0m, [32m'boffo'[0m, [32m'oomph'[0m, [32m'comby'[0m, [32m'bobby'[0m, [32m'boomy'[0m, [32m'gooby'[0m,
       [32m'woody'[0m, [32m'boggy'[0m, [32m'mochy'[0m, [32m'cocco'[0m, [32m'mommy'[0m, [32m'pommy'[0m, [32m'poddy'[0m,
       [32m'yobbo'[0m, [32m'foody'[0m, [32m'mobby'[0m, [32m'dohyo'[0m, [32m'doggo'[0m, [32m'cooch'[0m, [32m'howdy'[0m,
       [32m'doozy'[0m, [32m'hobby'[0m, [32m'vozhd'[0m, [32m'moggy'[0m, [32m'podgy'[0m, [32m'woofy'[0m, [32m'doggy'[0m,
       [32m'foggy'[0m, [32m'commo'[0m, [32m'moody'[0m, [32m'dobby'[0m, [32m'jocko'[0m, [32m'coomy'[0m, [32m'boogy'[0m,
       [32m'booky'[0m, [32m'zocco'[0m

### How often on average is the challenge word in the possible choices after the second guess

In [42]:
print(f'Avg times challenge in possible guesses: {round(second_guess_df_2["challenge_in_possible_guesses"].mean() * 100, 2)}%')

Avg times challenge in possible guesses: 75.72%


In [43]:
endswith_und = (
    simulating_tries_difficulty
    .filter(pl.col("challenge").str.ends_with("und"))
    ["group"]
    .unique()
    .to_numpy()
)

endswith_und

[1;35marray[0m[1m([0m[1m[[0m[1;36m2248[0m, [1;36m2404[0m, [1;36m4717[0m, [1;36m5900[0m, [1;36m5990[0m, [1;36m6233[0m, [1;36m6736[0m, [1;36m6928[0m, [1;36m7799[0m[1m][0m, [33mdtype[0m=[35muint32[0m[1m)[0m

In [44]:
endswith_und_group = rng.choice(endswith_und, 1, replace = False)
HTML(
    simulating_tries_difficulty
    .filter(pl.col("group") == endswith_und_group)
    .to_pandas()
    .assign(
        match_pattern = lambda df_:
        color_pattern_matching(
            dataf=df_,
            challenge_col="challenge",
            guess_col="guess"
        )
    )
    [[
        "challenge", "guess", "match_pattern", "next_guess",
        "num_choices_after_guess", "possible_guesses", "difficulty",
    ]]
    .to_html(escape=False)
)


Unnamed: 0,challenge,guess,match_pattern,next_guess,num_choices_after_guess,possible_guesses,difficulty
0,mound,saint,saint,loure,111,"young, prone, ebony, wrong, jenny, pound, blend, donne, funny, round",hard
1,mound,loure,loure,young,6,"young, wound, pound, mound, hound, nouny",hard
2,mound,young,young,pound,4,"pound, wound, mound, hound",hard
3,mound,pound,pound,wound,3,"wound, mound, hound",hard
4,mound,wound,wound,mound,2,"mound, hound",hard
5,mound,mound,mound,mound,0,,hard


In [45]:
endswith_ght = (
    simulating_tries_difficulty
    .filter(pl.col("challenge").str.ends_with("ght"))
    ["group"]
    .unique()
    .to_numpy()
)

endswith_ght


[1;35marray[0m[1m([0m[1m[[0m [1;36m143[0m,  [1;36m616[0m,  [1;36m975[0m, [1;36m1406[0m, [1;36m1916[0m, [1;36m2727[0m, [1;36m2804[0m, [1;36m4154[0m, [1;36m4520[0m, [1;36m4531[0m, [1;36m5828[0m,
       [1;36m5960[0m, [1;36m6224[0m, [1;36m6416[0m[1m][0m, [33mdtype[0m=[35muint32[0m[1m)[0m

In [46]:
endswith_ght_group = rng.choice(endswith_ght, 1, replace = False)
HTML(
    simulating_tries_difficulty
    .filter(pl.col("group") == endswith_ght_group)
    .to_pandas()
    .assign(
        match_pattern = lambda df_:
        color_pattern_matching(
            dataf=df_,
            challenge_col="challenge",
            guess_col="guess"
        )
    )
    [[
        "challenge", "guess", "match_pattern", "next_guess",
        "num_choices_after_guess", "possible_guesses", "difficulty",
    ]]
    .to_html(escape=False)
)


Unnamed: 0,challenge,guess,match_pattern,next_guess,num_choices_after_guess,possible_guesses,difficulty
0,bight,saint,saint,loure,68,"light, right, fruit, limit, fight, vomit, debit, tight, pilot, bigot",hard
1,bight,loure,loure,fight,16,"fight, tight, digit, wight, bight, dixit, hight, pipit, dicht, micht",hard
2,bight,fight,fight,tight,7,"tight, wight, bight, hight, dight, kight, pight",hard
3,bight,tight,tight,wight,6,"wight, bight, hight, dight, kight, pight",hard
4,bight,wight,wight,bight,5,"bight, hight, dight, kight, pight",hard
5,bight,bight,bight,bight,0,,hard


In [47]:
endswith_unt = (
    simulating_tries_difficulty
    .filter(pl.col("challenge").is_in(["jaunt", "vaunt", "daunt", "haunt", "gaunt", "taunt"]))
    ["group"]
    .unique()
)

In [48]:
endswith_unt_group = rng.choice(endswith_unt, 1, replace = False)
HTML(
    simulating_tries_difficulty
    .filter(pl.col("group") == endswith_unt_group)
    .to_pandas()
    .assign(
        match_pattern = lambda df_:
        color_pattern_matching(
            dataf=df_,
            challenge_col="challenge",
            guess_col="guess"
        )
    )
    [[
        "challenge", "guess", "match_pattern", "next_guess",
        "num_choices_after_guess", "possible_guesses", "difficulty",
    ]]
    .to_html(escape=False)
)


Unnamed: 0,challenge,guess,match_pattern,next_guess,num_choices_after_guess,possible_guesses,difficulty
0,taunt,saint,saint,loure,8,"haunt, gaunt, taunt, jaunt, daunt, vaunt, naunt, daynt",hard
1,taunt,loure,loure,haunt,7,"haunt, gaunt, taunt, jaunt, daunt, vaunt, naunt",hard
2,taunt,haunt,haunt,gaunt,6,"gaunt, taunt, jaunt, daunt, vaunt, naunt",hard
3,taunt,gaunt,gaunt,taunt,5,"taunt, jaunt, daunt, vaunt, naunt",hard
4,taunt,taunt,taunt,taunt,4,"jaunt, daunt, vaunt, naunt",hard
5,taunt,taunt,taunt,taunt,4,"jaunt, daunt, vaunt, naunt",hard


In [49]:
endswith_are = (
    simulating_tries_difficulty
    .filter(pl.col("challenge").str.ends_with("are"))
    ["group"]
    .unique()
    .to_numpy()
)

endswith_are


[1;35marray[0m[1m([0m[1m[[0m [1;36m473[0m,  [1;36m591[0m,  [1;36m704[0m, [1;36m1014[0m, [1;36m1119[0m, [1;36m1187[0m, [1;36m1428[0m, [1;36m1758[0m, [1;36m1777[0m, [1;36m2362[0m, [1;36m3102[0m,
       [1;36m3181[0m, [1;36m3279[0m, [1;36m3378[0m, [1;36m4192[0m, [1;36m4239[0m, [1;36m4501[0m, [1;36m4784[0m, [1;36m4856[0m, [1;36m5291[0m, [1;36m5318[0m, [1;36m5553[0m,
       [1;36m6860[0m, [1;36m7005[0m, [1;36m7091[0m, [1;36m7643[0m, [1;36m8475[0m[1m][0m, [33mdtype[0m=[35muint32[0m[1m)[0m

In [50]:
endswith_are_group = rng.choice(endswith_are, 1, replace = False)
HTML(
    simulating_tries_difficulty
    .filter(pl.col("group") == endswith_are_group)
    .to_pandas()
    .assign(
        match_pattern = lambda df_:
        color_pattern_matching(
            dataf=df_,
            challenge_col="challenge",
            guess_col="guess"
        )
    )
    [[
        "challenge", "guess", "match_pattern", "next_guess",
        "num_choices_after_guess", "possible_guesses", "difficulty",
    ]]
    .to_html(escape=False)
)


Unnamed: 0,challenge,guess,match_pattern,next_guess,num_choices_after_guess,possible_guesses,difficulty
0,heare,saint,saint,loure,751,"equal, royal, moral, clear, grade, board, local, black, legal, album",hard
1,heare,loure,loure,aware,14,"aware, feare, beare, heare, deare, phare, geare, crare, aygre, whare",hard
2,heare,aware,aware,feare,10,"feare, beare, heare, deare, phare, geare, crare, chare, meare, peare",hard
3,heare,feare,feare,beare,9,"beare, heare, deare, phare, geare, crare, chare, meare, peare",hard
4,heare,beare,beare,heare,8,"heare, deare, phare, geare, crare, chare, meare, peare",hard
5,heare,heare,heare,heare,0,,hard
