In [1]:
import numpy as np

# Read word lists, create index dicts
with open('data/guesses.txt', 'r') as f:
    guesses = np.array(f.read().split(','))
n_guesses = guesses.shape[0]
guess_ix = dict(zip(guesses, np.arange(n_guesses)))
with open('data/solutions.txt', 'r') as f:
    solutions = np.array(f.read().split(','))
n_solutions = solutions.shape[0]
solution_ix = dict(zip(solutions, np.arange(n_solutions)))

# Create feedback grid
guesses_l = guesses.view('<U1').reshape(-1, 1, 5)
solutions_l = solutions.view('<U1').reshape(1, -1, 5)
fb_grid = np.where(guesses_l == solutions_l, '2', '0')
for l in [chr(i+65) for i in range(26)]:
    ng = (guesses_l == l) & (fb_grid == '0')
    yfl = (ng & 
           (np.cumsum(ng, axis=2) 
            <= np.sum((solutions_l == l) & (fb_grid == '0'), 
                      axis=2, 
                      keepdims=True)))
    fb_grid = np.where(yfl, '1', fb_grid)
fb_grid = np.squeeze(fb_grid.view('<U5'))

In [2]:
def worthwhile_guesses(subgrid):
    """
    Filter out guesses with duplicate results or that don't reduce possibilities;
    return (1) indices for worthwhile guesses and (2) grouping flags
    """
    sg_gps = np.zeros_like(subgrid, dtype=int)
    exp_wds = np.zeros_like(subgrid[:, 0], dtype=float)
    ncols = subgrid.shape[1]
    rix = np.arange(n_guesses)
    lbl = 0
    while np.any(sg_gps == 0):
        rix = rix[np.any(sg_gps[rix] == 0, axis=1)]
        cix = np.min(np.where(sg_gps[rix] == 0, 
                              np.expand_dims(np.arange(ncols), 0), 
                              subgrid.shape[1]),
                     axis=1)
        lbl += 1
        sg_gps[rix] = np.where(subgrid[rix] == np.expand_dims(subgrid[rix, cix], 1), 
                               lbl, 
                               sg_gps[rix])
        exp_wds[rix] += (np.sum(sg_gps[rix] == lbl, axis=1) ** 2) / ncols
    
    sortix = np.lexsort([np.all(subgrid != '22222', axis=1),
                         *[sg_gps[:, i] for i in range(ncols-1, -1, -1)],
                         exp_wds])
    filt = np.any(sg_gps[sortix] != np.append(np.zeros_like(sg_gps[sortix][:1]), 
                                              sg_gps[sortix][:-1], 
                                              axis=0),
                  axis=1)
    filt2 = np.any(subgrid[sortix][filt] == '22222', axis=1)\
            | (exp_wds[sortix][filt] < (np.minimum(np.max(exp_wds[sortix][filt][:10]) * 2, ncols)))
    
    return np.arange(n_guesses)[sortix][filt][filt2], sg_gps[sortix][filt][filt2]

WORDGP_DICT = {'n_wordgps': 0}
def get_wordgp_id(wordgp, subdict):
    if len(wordgp) == 0:
        if 'ID' not in subdict:
            subdict['ID'] = WORDGP_DICT['n_wordgps']
            WORDGP_DICT['n_wordgps'] += 1
        return subdict['ID']
    else:
        if wordgp[0] not in subdict:
            subdict[wordgp[0]] = dict()
        return get_wordgp_id(wordgp[1:], subdict[wordgp[0]])

KNOWN = {}
def best_guess(ps='ALL'):
    if isinstance(ps, str) and ps == 'ALL':
        ps_id = 'ALL'
        ps = np.arange(n_solutions)
    else:
        ps_id = get_wordgp_id(ps, WORDGP_DICT)
    
    if ps_id not in KNOWN:
        try_guesses, gp_flags = worthwhile_guesses(fb_grid[:, ps])
        guess_sc = []
        for g in range(try_guesses.shape[0]):
            gp_sc = 0
            gps, gp_cts = np.unique(gp_flags[g], return_counts=True)
            for gp in range(gps.shape[0]):
                if ps_id == 'ALL':
                    print(f"\rGuess {g+1} of {try_guesses.shape[0]}, Group {gp+1} of {gps.shape[0]}{' '*20}", end='')
                if fb_grid[try_guesses[g], ps[gp_flags[g] == gps[gp]][0]] == '22222':
                    gp_sc += float(gp_cts[gp])
                else:
                    gp_sc += gp_cts[gp] * (best_guess(ps[gp_flags[g] == gps[gp]])[1] + 1)
            guess_sc += [gp_sc]
        min_ix = np.argmin(guess_sc)
        KNOWN[ps_id] = guesses[try_guesses[min_ix]], guess_sc[min_ix]/ps.shape[0]
            
    return KNOWN[ps_id]

In [None]:
# best_guess()

In [3]:
ps = np.arange(n_solutions)

In [4]:
g1 = guess_ix['ATONE']
fb1 = '10001'
ps = ps[fb_grid[g1, ps] == fb1]
print(g1)
print(fb1)
print(solutions[ps])

625
10001
['BAGEL' 'BAKER' 'BALER' 'BEACH' 'BEADY' 'BEARD' 'BEAST' 'BEGAN' 'BEGAT'
 'BLEAK' 'BLEAT' 'BREAD' 'BREAK' 'CADET' 'CAGEY' 'CAMEL' 'CAMEO' 'CAPER'
 'CEDAR' 'CHEAP' 'CHEAT' 'CLEAN' 'CLEAR' 'CLEAT' 'CREAK' 'CREAM' 'DEALT'
 'DEBAR' 'DECAL' 'DECAY' 'DELAY' 'DREAD' 'DREAM' 'EAGER' 'EASEL' 'FACET'
 'FEAST' 'FECAL' 'FELLA' 'FERAL' 'FREAK' 'GAMER' 'GAVEL' 'GAYER' 'GAZER'
 'GLEAM' 'GLEAN' 'GREAT' 'HAREM' 'HAVEN' 'HAZEL' 'HEADY' 'HEARD' 'HEART'
 'HEAVY' 'IDEAL' 'KEBAB' 'LABEL' 'LADEN' 'LAGER' 'LAPEL' 'LAYER' 'LEACH'
 'LEAFY' 'LEAKY' 'LEAPT' 'LEARN' 'LEASH' 'LEAST' 'LEGAL' 'MAKER' 'MEALY'
 'MECCA' 'MEDAL' 'MEDIA' 'NAVEL' 'OAKEN' 'OCEAN' 'OMEGA' 'OPERA' 'PALER'
 'PAPER' 'PARER' 'PAYER' 'PEACH' 'PEARL' 'PECAN' 'PEDAL' 'PLEAD' 'PLEAT'
 'RACER' 'RAMEN' 'RARER' 'RAVEN' 'REACH' 'REACT' 'READY' 'REALM' 'REARM'
 'REBAR' 'RECAP' 'REGAL' 'REHAB' 'RELAX' 'RELAY' 'REPAY' 'SAFER' 'SEDAN'
 'SEPIA' 'SHEAR' 'SMEAR' 'SPEAK' 'SPEAR' 'SWEAR' 'SWEAT' 'TAKEN' 'TAKER'
 'TAMER' 'TAPER' 'TEACH' 'TEARY' 'TERRA' 

In [None]:
best_guess(ps)

In [None]:
ps

In [None]:
g2 = guess_ix['SLURP']
fb2 = '01000'
ps = ps[fb_grid[g2, ps] == fb2]
print(g2)
print(fb2)
print(solutions[ps])

In [None]:
best_guess(ps)

In [None]:
g3 = guess_ix['LUNGE']
fb3 = '02122'
ps = ps[fb_grid[g3, ps] == fb3]
print(g3)
print(fb3)
print(solutions[ps])

In [None]:
best_guess(ps)