In [4]:
import pandas as pd

from src.wordle import WordleSolver

In [5]:
five_char_irish_words = []
unique_letters = {}

def add_unq_letters(letter_list):
    for l in letter_list:
        l = l.lower()
        unique_letters[l] = True

with open('./assets/irish-word-frequency.txt', 'r') as file:
    i = 0
    for line in file:
        if line[0].isdigit():
            num, word, freq, *_ = line.split('\t')
            if len(word) == 5:
                tmp = {}
                tmp['word'] = word.lower()
                tmp['freq'] = freq                
                five_char_irish_words.append(tmp)
                letters = list(word)
                # add the positions to the dictionary
                tmp['1'] = letters[1 - 1]
                tmp['2'] = letters[2 - 1]
                tmp['3'] = letters[3 - 1]
                tmp['4'] = letters[4 - 1]
                tmp['5'] = letters[5 - 1]
                # add unique values to the unique letters dictionary
                add_unq_letters(letters)
                
for item in five_char_irish_words:
    for letter in unique_letters:
        item[letter] = letter in item['word']

In [6]:
db = pd.DataFrame(five_char_irish_words)

In [7]:
db.head()

Unnamed: 0,word,freq,1,2,3,4,5,d,u,i,...,c,o,s,l,ó,p,ú,á,í,v
0,duine,139569,d,u,i,n,e,True,True,True,...,False,False,False,False,False,False,False,False,False,False
1,abair,94140,a,b,a,i,r,False,False,True,...,False,False,False,False,False,False,False,False,False,False
2,maith,87286,m,a,i,t,h,False,False,True,...,False,False,False,False,False,False,False,False,False,False
3,faigh,86973,f,a,i,g,h,False,False,True,...,False,False,False,False,False,False,False,False,False,False
4,téigh,74714,t,é,i,g,h,False,False,True,...,False,False,False,False,False,False,False,False,False,False


### Since the frequency values are counts lets norm them

In [8]:
db['freq'] = db['freq'].astype(float) / db['freq'].astype(float).sum()

In [9]:
# this gives the relative likelihood of each letter
# we'll create a score based on the sum of the letter likelihoods
letter_likelihood = db[db.columns[7:]].sum() / db[db.columns[7:]].sum().sum(); letter_likelihood

d    0.030769
u    0.023776
i    0.092541
n    0.050816
e    0.050583
a    0.123776
b    0.027972
r    0.073427
m    0.028904
t    0.055944
h    0.043124
f    0.018182
g    0.034266
é    0.016084
c    0.065967
o    0.051981
s    0.063636
l    0.055012
ó    0.011655
p    0.015851
ú    0.019114
á    0.027040
í    0.018881
v    0.000699
dtype: float64

Let's add a simple score column, based on the letter likelihood in each word

In [10]:
# summing up all of the values for the cases where we have a matching letter is the same thing
# as taking the dot product, and pandas has a handy function out of the box for that!

db['score'] = db[db.columns[7:]].dot(letter_likelihood); db['score']

0      0.248485
1      0.317716
2      0.344289
3      0.311888
4      0.241958
         ...   
908    0.303963
909    0.359907
910    0.267133
911    0.218881
912    0.320513
Name: score, Length: 913, dtype: float64

In [11]:
db.to_parquet('./assets/small_irish_df.parquet', index=False)

In [12]:
g = WordleSolver(db)

In [13]:
guess = g.make_guess(); print(f'Guess: {guess}')
clues = g.evaluate_guess(guess, 'báire')
g.update_clues_and_guess(clues, guess)
g.list_possible_guesses()

Guess: toill
t r
o r
i g
l r
l r


0      duine
3      faigh
21     dóigh
33     imigh
60     suigh
       ...  
876    múisc
878    buirg
888    céide
900    caise
904    muine
Name: word, Length: 73, dtype: object

In [14]:
guess = g.make_guess(); print(guess)
clues = g.evaluate_guess(guess, 'báire')
g.update_clues_and_guess(clues, guess)
g.list_possible_guesses()

scine
s r
c r
i g
n r
e g


63     máire
164    gáire
175    muire
251    báire
256    maide
291    péire
412    faire
652    faide
691    géire
Name: word, dtype: object

In [15]:
guess = g.make_guess(); print(guess)
clues = g.evaluate_guess(guess, 'báire')
g.update_clues_and_guess(clues, guess)
g.list_possible_guesses()

báire
b g
á g
i g
r g
e g


251    báire
Name: word, dtype: object