https://www.wired.com/story/best-wordle-tips/

In [None]:
%load_ext kedro.ipython

In [None]:
%reload_kedro

In [3]:
import sys
sys.path.append("../../src/projectwordle")

In [4]:
import polars as pl
import numpy as np
from IPython.display import HTML
from projectwordle.utils import (
    color_pattern_matching,
    difficulty_distribution,
)

pl.Config(tbl_rows=50)

[1m<[0m[1;95mpolars.config.Config[0m[39m object at [0m[1;36m0x1469d3490[0m[1m>[0m

# Load Data

In [None]:
five_letter_word_anagrams = catalog.load("five_letter_words_anagrams")
simulating_openers = catalog.load("simulating_openers")

In [6]:
simulating_tries_difficulty = (
    simulating_openers
    .select(
        "challenge", "guess", "letter_differences", "common_letters",
        "num_diff_letters", "num_common_letters", "num_matching_index",
        "match_pattern", "num_choices_after_guess", "possible_guesses",
        "challenge_in_possible_guesses", "next_guess", "group", "tries",
        "difficulty", "guess_word_anagrams", "anagram_num"
    )
    .join(
        five_letter_word_anagrams.select("words", "anagrams", "anagram_num"),
        left_on="next_guess",
        right_on="words",
        how="left",
        coalesce=True
    )
)

In [7]:
simulating_openers.head(6)

index,challenge,guess,match_pattern,letter_differences,common_letters,num_diff_letters,num_common_letters,num_matching_index,num_choices_after_guess,possible_guesses,challenge_in_possible_guesses,next_guess,group,tries,difficulty,guess_word_anagrams,anagram_num
u32,str,str,str,str,str,u8,u8,u8,u16,str,bool,str,u32,u8,str,str,u8
0,"""yummy""","""saint""","""BBBBB""","""nisat""","""""",5,0,0,1222,"""fully, chord, power, offer, er…",False,"""loure""",0,5,"""hard""","""saint, stain, satin, antis, na…",5
1,"""yummy""","""loure""","""BBYBB""","""nisaetolr""","""u""",9,1,0,56,"""dummy, fuzzy, muddy, puppy, bu…",False,"""dummy""",0,5,"""hard""","""loure, roule""",2
2,"""yummy""","""dummy""","""BGGGG""","""nidsaetolr""","""umy""",10,3,3,3,"""mummy, gummy, yummy""",True,"""mummy""",0,5,"""hard""","""dummy""",1
3,"""yummy""","""mummy""","""YGGGG""","""nidsaetolr""","""umy""",10,3,3,2,"""yummy, gummy""",True,"""yummy""",0,5,"""hard""","""mummy""",1
4,"""yummy""","""yummy""","""GGGGG""","""""","""umy""",0,3,3,1,"""gummy""",False,"""yummy""",0,5,"""hard""","""yummy""",1
5,"""yummy""","""yummy""","""GGGGG""","""""","""umy""",0,3,3,1,"""gummy""",False,"""yummy""",0,5,"""hard""","""yummy""",1


In [8]:
simulating_tries_difficulty.head()

challenge,guess,letter_differences,common_letters,num_diff_letters,num_common_letters,num_matching_index,match_pattern,num_choices_after_guess,possible_guesses,challenge_in_possible_guesses,next_guess,group,tries,difficulty,guess_word_anagrams,anagram_num,anagrams,anagram_num_right
str,str,str,str,u8,u8,u8,str,u16,str,bool,str,u32,u8,str,str,u8,str,u8
"""yummy""","""saint""","""nisat""","""""",5,0,0,"""BBBBB""",1222,"""fully, chord, power, offer, er…",False,"""loure""",0,5,"""hard""","""saint, stain, satin, antis, na…",5,"""loure, roule""",2
"""yummy""","""loure""","""nisaetolr""","""u""",9,1,0,"""BBYBB""",56,"""dummy, fuzzy, muddy, puppy, bu…",False,"""dummy""",0,5,"""hard""","""loure, roule""",2,"""dummy""",1
"""yummy""","""dummy""","""nidsaetolr""","""umy""",10,3,3,"""BGGGG""",3,"""mummy, gummy, yummy""",True,"""mummy""",0,5,"""hard""","""dummy""",1,"""mummy""",1
"""yummy""","""mummy""","""nidsaetolr""","""umy""",10,3,3,"""YGGGG""",2,"""yummy, gummy""",True,"""yummy""",0,5,"""hard""","""mummy""",1,"""yummy""",1
"""yummy""","""yummy""","""""","""umy""",0,3,3,"""GGGGG""",1,"""gummy""",False,"""yummy""",0,5,"""hard""","""yummy""",1,"""yummy""",1


In [9]:
(
    simulating_tries_difficulty
    .unique(subset=["group"], keep="first")
    ["difficulty"]
    .value_counts()
    .sort("count", descending=True)
)

difficulty,count
str,u32
"""moderate""",5462
"""hard""",3041
"""very hard""",583
"""easy""",2


# Create dataframe of first guesses

In [10]:
# Group by "Category" and return the first row of each group
first_guess = (
    simulating_tries_difficulty
    .group_by("group", maintain_order=True)
    .first()
)

first_guess.head()

group,challenge,guess,letter_differences,common_letters,num_diff_letters,num_common_letters,num_matching_index,match_pattern,num_choices_after_guess,possible_guesses,challenge_in_possible_guesses,next_guess,tries,difficulty,guess_word_anagrams,anagram_num,anagrams,anagram_num_right
u32,str,str,str,str,u8,u8,u8,str,u16,str,bool,str,u8,str,str,u8,str,u8
0,"""yummy""","""saint""","""nisat""","""""",5,0,0,"""BBBBB""",1222,"""fully, chord, power, offer, er…",False,"""loure""",5,"""hard""","""saint, stain, satin, antis, na…",5,"""loure, roule""",2
1,"""calyx""","""saint""","""ntis""","""a""",4,1,1,"""BGBBB""",495,"""cargo, labor, favor, large, ha…",False,"""loure""",4,"""moderate""","""saint, stain, satin, antis, na…",5,"""loure, roule""",2
2,"""varas""","""saint""","""nit""","""sa""",3,2,1,"""YGBBB""",184,"""harsh, cause, wales, false, ja…",False,"""loure""",5,"""hard""","""saint, stain, satin, antis, na…",5,"""loure, roule""",2
3,"""bubba""","""saint""","""ntis""","""a""",4,1,0,"""BYBBB""",751,"""board, weary, cheap, grave, fe…",False,"""loure""",4,"""moderate""","""saint, stain, satin, antis, na…",5,"""loure, roule""",2
4,"""lowes""","""saint""","""nait""","""s""",4,1,0,"""YBBBB""",440,"""press, house, close, fresh, mo…",False,"""loure""",4,"""moderate""","""saint, stain, satin, antis, na…",5,"""loure, roule""",2


In [11]:
avg_tries = round(first_guess["tries"].mean(), 2)
avg_tries

[1;36m4.22[0m

# EDA

In [12]:
# Plot difficulty distribution
difficulty_distribution(
    simulating_tries_difficulty
)

## Most frequent number of guesses

In [13]:
(
    first_guess
    ["tries"]
    .value_counts()
    .sort(by="count", descending=True)
)

tries,count
u8,u32
4.0,3384
5.0,2130
3.0,2078
6.0,911
,583
2.0,1
1.0,1


## Anagrams  

Only the most frequently occuring anagrams were only considered for root words after the five letter words were processed to remove plurals, names and places. As such there won't be any anagrams for `leads` as it's root word is `lead` which is a 4-letter word.  

However, if our challenge word is `leads`, or even its anagram `deals`, we can use their anagram `slade` to make a correct guess.

In [14]:
# where challenge and guess are anagrams, removing instances where you guess right
# the first time to see how quickly we converge on the challenge word

anagrams = (
    simulating_tries_difficulty
    .filter(
        (pl.col("letter_differences") == "")
        & (pl.col("num_common_letters") == 5)
        & (pl.col("challenge") != pl.col("guess"))
    )
)

anagrams

challenge,guess,letter_differences,common_letters,num_diff_letters,num_common_letters,num_matching_index,match_pattern,num_choices_after_guess,possible_guesses,challenge_in_possible_guesses,next_guess,group,tries,difficulty,guess_word_anagrams,anagram_num,anagrams,anagram_num_right
str,str,str,str,u8,u8,u8,str,u16,str,bool,str,u32,u8,str,str,u8,str,u8
"""natis""","""saint""","""""","""saint""",0,5,1,"""YGYYY""",1,"""natis""",True,"""loure""",682,3,"""moderate""","""saint, stain, satin, antis, na…",5,"""loure, roule""",2
"""stain""","""saint""","""""","""saint""",0,5,1,"""GYYYY""",1,"""stain""",True,"""loure""",1880,3,"""moderate""","""saint, stain, satin, antis, na…",5,"""loure, roule""",2
"""antis""","""saint""","""""","""saint""",0,5,0,"""YYYYY""",1,"""antis""",True,"""loure""",4908,3,"""moderate""","""saint, stain, satin, antis, na…",5,"""loure, roule""",2
"""satin""","""saint""","""""","""saint""",0,5,2,"""GGYYY""",1,"""satin""",True,"""loure""",6584,3,"""moderate""","""saint, stain, satin, antis, na…",5,"""loure, roule""",2


In [15]:
# Display the DataFrame with colored patterns

anagrams_select_group = np.random.choice(anagrams["group"], 1, replace = False)[0]

HTML(
    simulating_tries_difficulty
    .filter(pl.col("group") == anagrams_select_group)
    .to_pandas()
    .assign(
        match_pattern = lambda df_:
        color_pattern_matching(
            dataf=df_,
            challenge_col="challenge",
            guess_col="guess"
        )
    )
    [[
        "challenge", "guess", "match_pattern", "next_guess",
        "num_choices_after_guess", "possible_guesses", "difficulty",
    ]]
    .to_html(escape=False)
)


Unnamed: 0,challenge,guess,match_pattern,next_guess,num_choices_after_guess,possible_guesses,difficulty
0,satin,saint,saint,loure,1,satin,moderate
1,satin,loure,loure,satin,1,satin,moderate
2,satin,satin,satin,satin,0,,moderate
3,satin,satin,satin,satin,0,,moderate
4,satin,satin,satin,satin,0,,moderate
5,satin,satin,satin,satin,0,,moderate


### Challenge words without `r` , `s` or `t`
These are some of the most frequently ocuuring letters. We can then see how our naive algorithm operates in coming to the correct guess.

In [16]:
challenge_words_without_r_s_t = (
    first_guess
    .filter(
        ~(pl.col("challenge").str.contains("r|s|t"))
        & ((pl.col("challenge") != (pl.col("guess"))))
    )
)

challenge_words_without_r_s_t.sample(n = 20, with_replacement = False)

group,challenge,guess,letter_differences,common_letters,num_diff_letters,num_common_letters,num_matching_index,match_pattern,num_choices_after_guess,possible_guesses,challenge_in_possible_guesses,next_guess,tries,difficulty,guess_word_anagrams,anagram_num,anagrams,anagram_num_right
u32,str,str,str,str,u8,u8,u8,str,u16,str,bool,str,u8,str,str,u8,str,u8
2025,"""bicep""","""saint""","""nats""","""i""",4,1,0,"""BBYBB""",482,"""bible, pixel, river, liege, pi…",False,"""loure""",4,"""moderate""","""saint, stain, satin, antis, na…",5,"""loure, roule""",2
8530,"""pawaw""","""saint""","""ntis""","""a""",4,1,1,"""BGBBB""",495,"""rally, value, early, march, la…",False,"""loure""",6,"""hard""","""saint, stain, satin, antis, na…",5,"""loure, roule""",2
1839,"""gompa""","""saint""","""ntis""","""a""",4,1,0,"""BYBBB""",751,"""leave, realm, guard, glare, br…",False,"""loure""",5,"""hard""","""saint, stain, satin, antis, na…",5,"""loure, roule""",2
8308,"""lagan""","""saint""","""tis""","""an""",3,2,1,"""BGBYB""",149,"""range, canon, handy, canal, ca…",False,"""loure""",4,"""moderate""","""saint, stain, satin, antis, na…",5,"""loure, roule""",2
8839,"""knave""","""saint""","""tis""","""an""",3,2,0,"""BYBYB""",178,"""urban, human, woman, clean, ro…",False,"""loure""",3,"""moderate""","""saint, stain, satin, antis, na…",5,"""loure, roule""",2
3504,"""kinky""","""saint""","""ats""","""in""",3,2,0,"""BBYYB""",204,"""minor, inner, begin, fungi, in…",False,"""loure""",6,"""hard""","""saint, stain, satin, antis, na…",5,"""loure, roule""",2
1051,"""fling""","""saint""","""ats""","""in""",3,2,2,"""BBGGB""",42,"""bring, blind, drink, urine, rh…",False,"""loure""",5,"""hard""","""saint, stain, satin, antis, na…",5,"""loure, roule""",2
6804,"""comae""","""saint""","""ntis""","""a""",4,1,0,"""BYBBB""",751,"""brace, grave, board, black, op…",False,"""loure""",4,"""moderate""","""saint, stain, satin, antis, na…",5,"""loure, roule""",2
557,"""padma""","""saint""","""ntis""","""a""",4,1,1,"""BGBBB""",495,"""harry, papal, marry, early, wa…",False,"""loure""",4,"""moderate""","""saint, stain, satin, antis, na…",5,"""loure, roule""",2
795,"""gelee""","""saint""","""nisat""","""""",5,0,0,"""BBBBB""",1222,"""cover, humor, level, model, fu…",False,"""loure""",6,"""hard""","""saint, stain, satin, antis, na…",5,"""loure, roule""",2


In [17]:
# Plot difficulty distribution of of challenge words that doesn't contain 'r', 's' or 't'
difficulty_distribution(challenge_words_without_r_s_t)

## Null values (Incomplete games)

In [18]:
incomplete_games = (
    simulating_tries_difficulty
    .filter(pl.col("tries").is_null())
)

In [19]:
incomplete_games.head()

challenge,guess,letter_differences,common_letters,num_diff_letters,num_common_letters,num_matching_index,match_pattern,num_choices_after_guess,possible_guesses,challenge_in_possible_guesses,next_guess,group,tries,difficulty,guess_word_anagrams,anagram_num,anagrams,anagram_num_right
str,str,str,str,u8,u8,u8,str,u16,str,bool,str,u32,u8,str,str,u8,str,u8
"""scaly""","""saint""","""nit""","""sa""",3,2,1,"""GYBBB""",171,"""small, share, shall, space, sp…",False,"""loure""",11,,"""very hard""","""saint, stain, satin, antis, na…",5,"""loure, roule""",2
"""scaly""","""loure""","""nuieotr""","""sal""",7,3,1,"""YBBBB""",22,"""shall, small, scalp, scala, sp…",True,"""shall""",11,,"""very hard""","""loure, roule""",2,"""shall""",1
"""scaly""","""shall""","""nuieothr""","""sal""",8,3,2,"""GBGGY""",9,"""slack, scalp, scala, scaly, sc…",True,"""slack""",11,,"""very hard""","""shall""",1,"""slack""",1
"""scaly""","""slack""","""nukieothr""","""salc""",9,4,2,"""GYGYB""",4,"""scalp, scala, scaly, scald""",True,"""scalp""",11,,"""very hard""","""slack""",1,"""scalp, clasp""",2
"""scaly""","""scalp""","""nukipeothr""","""salc""",10,4,4,"""GGGGB""",3,"""scala, scald, scaly""",True,"""scala""",11,,"""very hard""","""scalp, clasp""",2,"""scala""",1


In [20]:
null_select_group = np.random.choice(incomplete_games["group"], 1, replace = False)[0]

HTML(
    incomplete_games
    .filter(pl.col("group") == null_select_group)
    .to_pandas()
    .assign(
        match_pattern = lambda df_:
        color_pattern_matching(
            dataf=df_,
            challenge_col="challenge",
            guess_col="guess"
        )
    )
    [[
        "challenge", "guess", "match_pattern", "next_guess",
        "num_choices_after_guess", "possible_guesses", "difficulty",
    ]]
    .to_html(escape=False)
)

Unnamed: 0,challenge,guess,match_pattern,next_guess,num_choices_after_guess,possible_guesses,difficulty
0,wussy,saint,saint,loure,440,"bosom, close, jesus, house, press, flush, moses, flesh, verse, horse",very hard
1,wussy,loure,loure,dusky,52,"dusky, husky, bushy, humus, musky, mucus, cusps, pussy, busby, mushy",very hard
2,wussy,dusky,dusky,bushy,13,"bushy, pussy, fussy, busby, mushy, pushy, hussy, mussy, cushy, wussy",very hard
3,wussy,bushy,bushy,pussy,5,"pussy, fussy, mussy, wussy, gussy",very hard
4,wussy,pussy,pussy,fussy,4,"fussy, mussy, wussy, gussy",very hard
5,wussy,fussy,fussy,mussy,3,"mussy, wussy, gussy",very hard


In [21]:
incomplete_games_check = (
    incomplete_games
    .filter(pl.col("group") == null_select_group)
    .select(
        "challenge", "guess", "match_pattern", "num_choices_after_guess",
        "possible_guesses", "challenge_in_possible_guesses", "next_guess"
    )
)

incomplete_games_check

challenge,guess,match_pattern,num_choices_after_guess,possible_guesses,challenge_in_possible_guesses,next_guess
str,str,str,u16,str,bool,str
"""wussy""","""saint""","""YBBBB""",440,"""bosom, close, jesus, house, pr…",False,"""loure"""
"""wussy""","""loure""","""BBYBB""",52,"""dusky, husky, bushy, humus, mu…",False,"""dusky"""
"""wussy""","""dusky""","""BGGBG""",13,"""bushy, pussy, fussy, busby, mu…",True,"""bushy"""
"""wussy""","""bushy""","""BGGBG""",5,"""pussy, fussy, mussy, wussy, gu…",True,"""pussy"""
"""wussy""","""pussy""","""BGGGG""",4,"""fussy, mussy, wussy, gussy""",True,"""fussy"""
"""wussy""","""fussy""","""BGGGG""",3,"""mussy, wussy, gussy""",True,"""mussy"""


In [22]:
(
    incomplete_games_check
    .gather_every(n=1)
    .head(1)
    ["possible_guesses"]
    .item()
)

[32m'bosom, close, jesus, house, press, flush, moses, flesh, verse, horse'[0m

In [23]:
(
    incomplete_games_check
    .gather_every(n=1, offset=1)
    .head(1)
    ["possible_guesses"]
    .item()
)

[32m'dusky, husky, bushy, humus, musky, mucus, cusps, pussy, busby, mushy'[0m

In [24]:
(
    incomplete_games_check
    .gather_every(n=1, offset=2)
    .head(1)
    ["possible_guesses"]
    .item()
)

[32m'bushy, pussy, fussy, busby, mushy, pushy, hussy, mussy, cushy, wussy'[0m

In [25]:
(
    incomplete_games_check
    .gather_every(n=1, offset=3)
    .head(1)
    ["possible_guesses"]
    .item()
)

[32m'pussy, fussy, mussy, wussy, gussy'[0m

In [26]:
(
    incomplete_games_check
    .gather_every(n=1, offset=4)
    .head(1)
    ["possible_guesses"]
    .item()
)

[32m'fussy, mussy, wussy, gussy'[0m

In [27]:
(
    incomplete_games_check
    .gather_every(n=1, offset=5)
    .head(1)
    ["possible_guesses"]
    .item()
)

[32m'mussy, wussy, gussy'[0m

# Challenge word statistics

In [28]:
challenge_stats = (
    simulating_tries_difficulty
    .group_by("challenge")
    .agg(
        tries_mode=pl.col("tries").mode().cast(pl.UInt8),
        tries_mean=(pl.col("tries").mean().cast(pl.Float32)).round(3),
        tries_null_pct=(pl.col("tries").is_null().mean().cast(pl.Float32) * 100).round(3),
    )
    .explode("tries_mode")
    .sort("tries_null_pct", descending = True)
)

In [29]:
challenge_stats.head()

challenge,tries_mode,tries_mean,tries_null_pct
str,u8,f32,f32
"""taxer""",,,100.0
"""canty""",,,100.0
"""pudus""",,,100.0
"""cardy""",,,100.0
"""polys""",,,100.0


In [30]:
print(f"Overall incompletion rate: {(first_guess['tries'].is_null().sum() / first_guess.height) * 100:.2f}%")

Overall incompletion rate: 6.42%


## Retrieve the 2nd row of every group of 6 rows.

### Determine the average number of choices remaining after the second guess. This will tell us how effective the two guess strategy is at solving the challenge word.

In [31]:
second_guess_df = (
    simulating_tries_difficulty
    .with_row_index(name="row_num")
    .filter(
        (pl.col("row_num") % 6 == 1) &
        ~(pl.col("guess") == "saint") # remove instance where 'saint' as first guess is correct
    )
    .drop("row_num")
)

In [32]:
second_guess_df.head()

challenge,guess,letter_differences,common_letters,num_diff_letters,num_common_letters,num_matching_index,match_pattern,num_choices_after_guess,possible_guesses,challenge_in_possible_guesses,next_guess,group,tries,difficulty,guess_word_anagrams,anagram_num,anagrams,anagram_num_right
str,str,str,str,u8,u8,u8,str,u16,str,bool,str,u32,u8,str,str,u8,str,u8
"""yummy""","""loure""","""nisaetolr""","""u""",9,1,0,"""BBYBB""",56,"""dummy, fuzzy, muddy, puppy, bu…",False,"""dummy""",0,5,"""hard""","""loure, roule""",2,"""dummy""",1.0
"""calyx""","""loure""","""nuisetor""","""al""",8,2,1,"""YBBBB""",44,"""palmy, papal, wally, badly, ma…",True,"""palmy""",1,4,"""moderate""","""loure, roule""",2,,
"""varas""","""loure""","""nuietol""","""sar""",7,3,1,"""BBBYB""",21,"""harsh, marsh, rajas, warps, ra…",True,"""harsh""",2,5,"""hard""","""loure, roule""",2,"""harsh""",1.0
"""bubba""","""loure""","""nisetolr""","""au""",8,2,0,"""BBYBB""",21,"""quack, yucca, bubba, guava, gu…",True,"""quack""",3,4,"""moderate""","""loure, roule""",2,"""quack""",1.0
"""lowes""","""loure""","""nuiatr""","""sloe""",6,4,2,"""GGBBY""",4,"""loess, lowes, losel, lomes""",True,"""loess""",4,4,"""moderate""","""loure, roule""",2,"""loess""",1.0


In [33]:
# More efficiently 
second_guess_df_2 = (
    simulating_tries_difficulty
    .gather_every(n=6, offset=1)
    .filter(~(pl.col("guess") == "saint")) # remove instance where 'saint' as first guess is correct
)

second_guess_df_2.head()

challenge,guess,letter_differences,common_letters,num_diff_letters,num_common_letters,num_matching_index,match_pattern,num_choices_after_guess,possible_guesses,challenge_in_possible_guesses,next_guess,group,tries,difficulty,guess_word_anagrams,anagram_num,anagrams,anagram_num_right
str,str,str,str,u8,u8,u8,str,u16,str,bool,str,u32,u8,str,str,u8,str,u8
"""yummy""","""loure""","""nisaetolr""","""u""",9,1,0,"""BBYBB""",56,"""dummy, fuzzy, muddy, puppy, bu…",False,"""dummy""",0,5,"""hard""","""loure, roule""",2,"""dummy""",1.0
"""calyx""","""loure""","""nuisetor""","""al""",8,2,1,"""YBBBB""",44,"""palmy, papal, wally, badly, ma…",True,"""palmy""",1,4,"""moderate""","""loure, roule""",2,,
"""varas""","""loure""","""nuietol""","""sar""",7,3,1,"""BBBYB""",21,"""harsh, marsh, rajas, warps, ra…",True,"""harsh""",2,5,"""hard""","""loure, roule""",2,"""harsh""",1.0
"""bubba""","""loure""","""nisetolr""","""au""",8,2,0,"""BBYBB""",21,"""quack, yucca, bubba, guava, gu…",True,"""quack""",3,4,"""moderate""","""loure, roule""",2,"""quack""",1.0
"""lowes""","""loure""","""nuiatr""","""sloe""",6,4,2,"""GGBBY""",4,"""loess, lowes, losel, lomes""",True,"""loess""",4,4,"""moderate""","""loure, roule""",2,"""loess""",1.0


In [34]:
print(f'Avg num choices after 2nd guess: {round(second_guess_df_2["num_choices_after_guess"].mean(), 2)}')

Avg num choices after 2nd guess: 15.68


### Greatest num of choices left after second guess

In [35]:
max_num_choices_after_2nd_guess = (
    second_guess_df_2
    .sort("num_choices_after_guess", descending=True)
    ["num_choices_after_guess"]
    .head(1)
    .item()
)

max_num_choices_after_2nd_guess

[1;36m81[0m

In [36]:
(
    second_guess_df_2
    .filter(pl.col("num_choices_after_guess") == max_num_choices_after_2nd_guess)
    ["challenge"]
    .unique()
    .to_numpy()
)


[1;35marray[0m[1m([0m[1m[[0m[32m'booby'[0m, [32m'foody'[0m, [32m'boody'[0m, [32m'woofy'[0m, [32m'booky'[0m, [32m'doomy'[0m, [32m'coppy'[0m,
       [32m'moppy'[0m, [32m'dodgy'[0m, [32m'gooky'[0m, [32m'boomy'[0m, [32m'doody'[0m, [32m'howff'[0m, [32m'dobby'[0m,
       [32m'howdy'[0m, [32m'poovy'[0m, [32m'poboy'[0m, [32m'podgy'[0m, [32m'cohog'[0m, [32m'cobby'[0m, [32m'goopy'[0m,
       [32m'bobby'[0m, [32m'comby'[0m, [32m'doggy'[0m, [32m'gooby'[0m, [32m'zocco'[0m, [32m'doddy'[0m, [32m'mobby'[0m,
       [32m'boozy'[0m, [32m'poddy'[0m, [32m'pommy'[0m, [32m'moggy'[0m, [32m'compo'[0m, [32m'gobby'[0m, [32m'commy'[0m,
       [32m'oomph'[0m, [32m'cocco'[0m, [32m'doggo'[0m, [32m'dohyo'[0m, [32m'woozy'[0m, [32m'comfy'[0m, [32m'zoppo'[0m,
       [32m'foggy'[0m, [32m'boogy'[0m, [32m'doozy'[0m, [32m'goody'[0m, [32m'coomy'[0m, [32m'gombo'[0m, [32m'boofy'[0m,
       [32m'pozzy'[0m, [32m'pooch'[0m

### How often on average is the challenge word in the possible choices after the second guess

In [37]:
print(f'Avg times challenge in possible guesses: {round(second_guess_df_2["challenge_in_possible_guesses"].mean() * 100, 2)}%')

Avg times challenge in possible guesses: 75.72%


In [38]:
(
    simulating_tries_difficulty
    .filter(pl.col("challenge") == "hound")
)

challenge,guess,letter_differences,common_letters,num_diff_letters,num_common_letters,num_matching_index,match_pattern,num_choices_after_guess,possible_guesses,challenge_in_possible_guesses,next_guess,group,tries,difficulty,guess_word_anagrams,anagram_num,anagrams,anagram_num_right
str,str,str,str,u8,u8,u8,str,u16,str,bool,str,u32,u8,str,str,u8,str,u8
"""hound""","""saint""","""atis""","""n""",4,1,1,"""BBBGB""",111,"""young, drunk, pound, blend, ro…",False,"""loure""",2248,,"""very hard""","""saint, stain, satin, antis, na…",5,"""loure, roule""",2
"""hound""","""loure""","""isaetlr""","""nou""",7,3,3,"""BGGBB""",6,"""young, wound, pound, mound, ho…",True,"""young""",2248,,"""very hard""","""loure, roule""",2,"""young""",1
"""hound""","""young""","""yisaetlrg""","""nou""",9,3,3,"""BGGGB""",4,"""wound, pound, mound, hound""",True,"""wound""",2248,,"""very hard""","""young""",1,"""wound""",1
"""hound""","""wound""","""wyisaetlrg""","""noud""",10,4,4,"""BGGGG""",3,"""pound, mound, hound""",True,"""pound""",2248,,"""very hard""","""wound""",1,"""pound""",1
"""hound""","""pound""","""wyispaetlrg""","""noud""",11,4,4,"""BGGGG""",2,"""mound, hound""",True,"""mound""",2248,,"""very hard""","""pound""",1,"""mound""",1
"""hound""","""mound""","""wyispaetlmrg""","""noud""",12,4,4,"""BGGGG""",1,"""hound""",True,"""hound""",2248,,"""very hard""","""mound""",1,"""hound""",1


In [39]:
HTML(
    simulating_tries_difficulty
    .filter(pl.col("group") == 2248)
    .to_pandas()
    .assign(
        match_pattern = lambda df_:
        color_pattern_matching(
            dataf=df_,
            challenge_col="challenge",
            guess_col="guess"
        )
    )
    [[
        "challenge", "guess", "match_pattern", "next_guess",
        "num_choices_after_guess", "possible_guesses", "difficulty",
    ]]
    .to_html(escape=False)
)


Unnamed: 0,challenge,guess,match_pattern,next_guess,num_choices_after_guess,possible_guesses,difficulty
0,hound,saint,saint,loure,111,"young, drunk, pound, blend, round, prune, wrong, bonne, penny, wound",very hard
1,hound,loure,loure,young,6,"young, wound, pound, mound, hound, nouny",very hard
2,hound,young,young,wound,4,"wound, pound, mound, hound",very hard
3,hound,wound,wound,pound,3,"pound, mound, hound",very hard
4,hound,pound,pound,mound,2,"mound, hound",very hard
5,hound,mound,mound,hound,1,hound,very hard


In [40]:
(
    simulating_tries_difficulty
    .filter(pl.col("challenge") == "night")
)

challenge,guess,letter_differences,common_letters,num_diff_letters,num_common_letters,num_matching_index,match_pattern,num_choices_after_guess,possible_guesses,challenge_in_possible_guesses,next_guess,group,tries,difficulty,guess_word_anagrams,anagram_num,anagrams,anagram_num_right
str,str,str,str,u8,u8,u8,str,u16,str,bool,str,u32,u8,str,str,u8,str,u8
"""night""","""saint""","""as""","""int""",2,3,1,"""BBYYG""",19,"""night, input, inert, inlet, ni…",True,"""loure""",5828,3,"""moderate""","""saint, stain, satin, antis, na…",5,"""loure, roule""",2
"""night""","""loure""","""usaeolr""","""int""",7,3,1,"""BBBBB""",6,"""night, nicht, tinct, inwit, ci…",True,"""night""",5828,3,"""moderate""","""loure, roule""",2,"""night, thing""",2
"""night""","""night""","""""","""intgh""",0,5,5,"""GGGGG""",0,"""""",False,"""night""",5828,3,"""moderate""","""night, thing""",2,"""night, thing""",2
"""night""","""night""","""""","""intgh""",0,5,5,"""GGGGG""",0,"""""",False,"""night""",5828,3,"""moderate""","""night, thing""",2,"""night, thing""",2
"""night""","""night""","""""","""intgh""",0,5,5,"""GGGGG""",0,"""""",False,"""night""",5828,3,"""moderate""","""night, thing""",2,"""night, thing""",2
"""night""","""night""","""""","""intgh""",0,5,5,"""GGGGG""",0,"""""",False,"""night""",5828,3,"""moderate""","""night, thing""",2,"""night, thing""",2


In [41]:
HTML(
    simulating_tries_difficulty
    .filter(pl.col("group") == 5828)
    .to_pandas()
    .assign(
        match_pattern = lambda df_:
        color_pattern_matching(
            dataf=df_,
            challenge_col="challenge",
            guess_col="guess"
        )
    )
    [[
        "challenge", "guess", "match_pattern", "next_guess",
        "num_choices_after_guess", "possible_guesses", "difficulty",
    ]]
    .to_html(escape=False)
)


Unnamed: 0,challenge,guess,match_pattern,next_guess,num_choices_after_guess,possible_guesses,difficulty
0,night,saint,saint,loure,19,"night, input, inert, inlet, nicht, unlit, unfit, inept, ingot, pinot",moderate
1,night,loure,loure,night,6,"night, nicht, tinct, inwit, cinct, binit",moderate
2,night,night,night,night,0,,moderate
3,night,night,night,night,0,,moderate
4,night,night,night,night,0,,moderate
5,night,night,night,night,0,,moderate


In [45]:
(
    simulating_tries_difficulty
    .filter(pl.col("challenge").is_in(["jaunt", "vaunt", "daunt", "haunt", "gaunt", "taunt"]))
    ["group"]
    .unique()
)

group
u32
879
2303
3283
6634
7793
7884


In [59]:
HTML(
    simulating_tries_difficulty
    .filter(pl.col("group") == 879)
    .to_pandas()
    .assign(
        match_pattern = lambda df_:
        color_pattern_matching(
            dataf=df_,
            challenge_col="challenge",
            guess_col="guess"
        )
    )
    [[
        "challenge", "guess", "match_pattern", "next_guess",
        "num_choices_after_guess", "possible_guesses", "difficulty",
    ]]
    .to_html(escape=False)
)


Unnamed: 0,challenge,guess,match_pattern,next_guess,num_choices_after_guess,possible_guesses,difficulty
0,vaunt,saint,saint,loure,8,"haunt, gaunt, taunt, jaunt, daunt, vaunt, naunt, daynt",very hard
1,vaunt,loure,loure,haunt,7,"haunt, gaunt, taunt, jaunt, daunt, vaunt, naunt",very hard
2,vaunt,haunt,haunt,gaunt,6,"gaunt, taunt, jaunt, daunt, vaunt, naunt",very hard
3,vaunt,gaunt,gaunt,taunt,5,"taunt, jaunt, daunt, vaunt, naunt",very hard
4,vaunt,taunt,taunt,jaunt,4,"jaunt, daunt, vaunt, naunt",very hard
5,vaunt,jaunt,jaunt,daunt,3,"daunt, vaunt, naunt",very hard
