In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from time import time

In [2]:
# import numpy as np
# words = np.loadtxt('CSW.txt', dtype='str')
# five_letter_words = np.array([word for word in words if len(word)==5])
# np.savetxt('valid_words_2.txt',five_letter_words,fmt='%s')

In [2]:
class wordle_testing():
    def __init__(self, true):
        self.valid_list = np.loadtxt('valid_guesses.txt', dtype='str')
        self.true = true
        
    def check(self, guess):
        """
        Gives wordle output based on guess

        Parameters
        ----------
        guess : str
            guessed word to be tested
        true  : str
            true word to be checked against

        Returns
        -------
        5 character string of some combination of B, Y, G (black, yellow, green)
        """
        output=np.array(list('BBBBB'))
        occurrences_true  = {letter:self.true.count(letter) for letter in set(self.true)}
        GY_guess = {letter:0 for letter in set(guess)}
        for i, guess_letter in enumerate(guess):
            # First past to decide which letters are green
            if guess_letter == self.true[i]:
                GY_guess[guess_letter] += 1
                output[i]='G'
        for i, guess_letter in enumerate(guess):
            # Second pass to decide which letters are yellow
            if output[i] != 'G':
                if (guess_letter in self.true):
                    if (GY_guess[guess_letter] < occurrences_true[guess_letter]): # cannot combine both if statements because guess letter may not occur in occurences_true
                        GY_guess[guess_letter] += 1
                        output[i] = 'Y'
                else:
                    output[i] = 'B'
        return output[0]+output[1]+output[2]+output[3]+output[4]

    
    
    

In [3]:
def check(guess, true):
    """
    Gives wordle output based on guess

    Parameters
    ----------
    guess : str
        guessed word to be tested
    true  : str
        true word to be checked against

    Returns
    -------
    5 character string of some combination of B, Y, G (black, yellow, green)
    """
    output=np.array(list('BBBBB'))
    occurrences_true  = {letter:true.count(letter) for letter in set(true)}
    GY_guess = {letter:0 for letter in set(guess)}
    for i, guess_letter in enumerate(guess):
        # First past to decide which letters are green
        if guess_letter == true[i]:
            GY_guess[guess_letter] += 1
            output[i]='G'
    for i, guess_letter in enumerate(guess):
        # Second pass to decide which letters are yellow
        if output[i] != 'G':
            if (guess_letter in true):
                if (GY_guess[guess_letter] < occurrences_true[guess_letter]): # cannot combine both if statements because guess letter may not occur in occurences_true
                    GY_guess[guess_letter] += 1
                    output[i] = 'Y'
            else:
                output[i] = 'B'
    return output[0]+output[1]+output[2]+output[3]+output[4]


In [4]:
class wordle_solver():
    def __init__(self):
        self.valid_words = np.loadtxt('valid_guesses.txt', dtype='str')
        self.wordles = self.valid_words # a current list of possible wordles given guesses and outputs
        self.all_wordles = np.loadtxt('wordles.txt', dtype='str')
        
    def update_possible_wordles(self, guess, output):
        test = np.zeros(shape=(len(self.wordles)),dtype='<U5')
        for i, word in enumerate(self.wordles):
            test[i] = check(guess,word)
        df = pd.DataFrame(data={'word':self.wordles,'output':test}, dtype='<U5').set_index('word')
        self.wordles = df[df['output']==output].index.values
        print(self.wordles)
        print('number of possible wordles:',len(self.wordles))
        
    def calculate_std(self):
        std = []
        for j, word1 in enumerate(self.wordles):   
            test = np.zeros(shape=(len(self.wordles)), dtype='<U5')
            for i, word2 in enumerate(self.wordles):
                test[i] = check(word1,word2)
            df = pd.DataFrame(test, columns = ['output'], dtype='<U5')
            a = df.groupby('output').size()
            combinations = pd.read_csv('combinations.txt', names=['output'], index_col = 0, squeeze=True)
            combinations['counts']=0
            combinations.loc[a.index, 'counts'] = a.values
            std.append(combinations.std()[0]) # correct std
        
        standard_dev = pd.DataFrame(data={'words':self.wordles,'std':std}).sort_values('std', ignore_index=True)
        best_word = standard_dev.loc[0,'words']
        print(standard_dev)
        return best_word
        
        

In [5]:
ws = wordle_solver()

In [6]:
wordle = 'ROATE'

### Guess 1: 

In [7]:
guess1 = 'ROATE'
# output = check(guess1, wordle)
output = 'BBBYB' 

In [8]:
ws.update_possible_wordles(guess1, output)

['BIGHT' 'BINIT' 'BITCH' 'BITSY' 'BLIST' 'BLUNT' 'BUILT' 'BUIST' 'BUNDT'
 'BUTCH' 'BUTUT' 'BUTYL' 'CINCT' 'CLIFT' 'CLINT' 'CLIPT' 'CUBIT' 'CUNIT'
 'CUTCH' 'CUTIN' 'CUTIS' 'CUTUP' 'CWTCH' 'DICHT' 'DIDST' 'DIGHT' 'DIGIT'
 'DITCH' 'DITSY' 'DITZY' 'DIXIT' 'DUTCH' 'FIGHT' 'FITCH' 'FITLY' 'FIXIT'
 'FLINT' 'FLUYT' 'GITCH' 'GIUST' 'GLIFT' 'GLINT' 'GUILT' 'GUTSY' 'HIGHT'
 'HITCH' 'HUTCH' 'ICTIC' 'ICTUS' 'IMMIT' 'INCUT' 'INNIT' 'INPUT' 'INTIL'
 'INTIS' 'INUST' 'INWIT' 'ITCHY' 'KIGHT' 'KITHS' 'KITUL' 'KUTCH' 'KUTIS'
 'KUTUS' 'KYDST' 'LICHT' 'LICIT' 'LIGHT' 'LIMIT' 'LITHS' 'LYTIC' 'MICHT'
 'MIDST' 'MIGHT' 'MITCH' 'MITIS' 'MUIST' 'MULCT' 'MUSIT' 'MUTCH' 'MUTIS'
 'MYTHI' 'MYTHS' 'MYTHY' 'NICHT' 'NIGHT' 'NITID' 'NUTSY' 'PHPHT' 'PIGHT'
 'PIPIT' 'PITCH' 'PITHS' 'PITHY' 'PUTID' 'QUBIT' 'QUILT' 'QUINT' 'QUIST'
 'SCUFT' 'SHIFT' 'SHIST' 'SHTIK' 'SHTUM' 'SHTUP' 'SHUNT' 'SICHT' 'SIGHT'
 'SITUP' 'SITUS' 'SKINT' 'SLIPT' 'SLUIT' 'SNIFT' 'SPILT' 'SPLIT' 'SQUIT'
 'STICH' 'STICK' 'STIFF' 'STILB' 'STILL' 'STILT' 'S

### Guess 2: 

In [9]:
guess2 = ws.calculate_std()
# output = check(guess2, wordle)
# output

     words       std
0    SHUNT  2.672056
1    SICHT  2.708917
2    THINS  2.839983
3    SUINT  2.873254
4    TITIS  2.918912
..     ...       ...
232  THYMY  6.195123
233  MYTHY  6.371376
234  STYMY  6.563061
235  PHPHT  6.784112
236  CWTCH  7.223084

[237 rows x 2 columns]


In [10]:
output = 'BYBBG'

In [11]:
ws.update_possible_wordles(guess2, output)

['BIGHT' 'DICHT' 'DIGHT' 'FIGHT' 'HIGHT' 'KIGHT' 'LICHT' 'LIGHT' 'MICHT'
 'MIGHT' 'PIGHT' 'TIGHT' 'WIGHT']
number of possible wordles: 13


### Guess 3: 

In [12]:
guess3 = ws.calculate_std()
# output = check(guess3, wordle)
# output

    words       std
0   DICHT  0.597185
1   DIGHT  0.597185
2   LICHT  0.597185
3   LIGHT  0.597185
4   MICHT  0.597185
5   MIGHT  0.597185
6   BIGHT  0.610868
7   FIGHT  0.610868
8   HIGHT  0.610868
9   KIGHT  0.610868
10  PIGHT  0.610868
11  TIGHT  0.610868
12  WIGHT  0.610868


In [13]:
ws.update_possible_wordles(guess3, output)

[]
number of possible wordles: 0


In [12]:
def calculate_std(wordles, valid):
    std = []
    for j, word1 in enumerate(valid):   
        test = np.zeros(shape=(len(wordles)), dtype='<U5')
        for i, word2 in enumerate(wordles):
            test[i] = check(word1,word2)
        df = pd.DataFrame(test, columns = ['output'], dtype='<U5')
        a = df.groupby('output').size()
        combinations = pd.read_csv('combinations.txt', names=['output'], index_col = 0, squeeze=True)
        combinations['counts']=0
        combinations.loc[a.index, 'counts'] = a.values
        std.append(combinations.std()[0]) # correct std
    
    return pd.DataFrame(data={'words':valid,'std':std})

In [13]:
a = calculate_std(ws.all_wordles, ws.valid_words)

In [15]:
a = a.sort_values('std')

In [17]:
a.to_csv('std_reduced.csv',index=False)

In [172]:
def combinations(word, subset):
    test = np.zeros(shape=(len(subset)), dtype='<U5')
    for i, word2 in enumerate(subset):
        test[i] = check(word,word2)
    df = pd.DataFrame(test, columns = ['output'], dtype='<U5')
    a = df.groupby('output').size()
    combinations = pd.read_csv('combinations.txt', names=['output'], index_col = 0, squeeze=True)
    combinations['counts']=0
    combinations.loc[a.index, 'counts'] = a.values
    return combinations

In [184]:
calculate_std(df)

Unnamed: 0,words,std
0,ROCKS,1.449908
1,ROLFS,1.376816
2,ROLLS,1.464089
3,ROMPS,1.340317
4,RONTS,1.04593
5,ROODS,1.217538
6,ROOFS,1.251017
7,ROOKS,1.169058
8,ROOMS,1.21414
9,ROONS,1.147654


In [174]:
df = find_possible_wordles('ROCKS', 'GBBYB')

In [175]:
df

array(['ROCKS', 'ROLFS', 'ROLLS', 'ROMPS', 'RONTS', 'ROODS', 'ROOFS',
       'ROOKS', 'ROOMS', 'ROONS', 'ROOPS', 'ROOTS', 'RORTS', 'ROSTS',
       'ROTLS', 'ROTOS', 'ROULS', 'ROUMS', 'ROUPS', 'ROUTS', 'ROWTS',
       'RUBUS', 'RUCKS', 'RUDDS', 'RUFFS', 'RUKHS', 'RUMPS', 'RUNDS',
       'RUNGS', 'RUNTS', 'RURPS', 'RURUS', 'RUSHY', 'RUSKS', 'RUSTS',
       'RUSTY', 'RUTHS', 'RYNDS', 'RYOTS'], dtype=object)

In [168]:
comb = combinations('RAISE', df)

In [161]:
def find_possible_wordles(guess, output):
    test = np.zeros(shape=(len(valid)),dtype='<U5')
    for i, word in enumerate(valid):
        test[i] = check(guess,word)
    df = pd.DataFrame(data={'word':valid,'output':test}, dtype='<U5').set_index('word')
    return df[df['output']==output].index.values

In [134]:
abs(comb - len(valid)/243).sum()

counts    19608.740741
dtype: float64

In [402]:
comb.mean() #mean is the same for all words, so variance should be a fine measure

counts    0.160494
dtype: float64

In [136]:
len(valid)/243

53.382716049382715

In [56]:
stds = np.array(std).flatten()

In [57]:
len(stds)

12972

In [72]:
pd.DataFrame(data={'words':valid,'std':stds}).to_csv('std.csv', index=False)

In [432]:
valid = np.loadtxt('valid_words_2.txt', dtype='str')
print('number of five letter words in dataset:',len(valid))
stds = pd.read_csv('std.csv')

number of five letter words in dataset: 12972


In [387]:
(combinations('LARES',valid)>0).sum()

counts    192
dtype: int64

In [393]:
(combinations('TARES',valid)>0).sum()

counts    212
dtype: int64

In [381]:
stds.sort_values('std').head(20)

Unnamed: 0,words,std
6096,LARES,111.159282
8872,RALES,112.045582
11115,TARES,112.247281
7350,NARES,113.986323
8926,RATES,115.097038
11249,TERAS,115.93275
3261,EARLS,116.34356
11448,TOEAS,116.67638
6043,LAERS,117.057015
8967,REAIS,117.07297


In [137]:
# we need to make a 243x243? table 

In [509]:
# # Finding combinations
# from itertools import product
# a = product(list('GYB'), repeat=5)
# c = np.array([''.join(b) for b in a], '<U5')
# np.savetxt('combinations.txt', c, fmt='%s')