# WordleAssist

In [32]:
# imports
import re
from urllib import request
try:
  import wordfreq as wf
except:
  wf = None
finally:
  use_wordfreqs = wf is not None

In [15]:
# setup
url = "https://raw.githubusercontent.com/dwyl/english-words/master/words_alpha.txt"
s = ""
words = []
with request.urlopen(url) as f:
    s = f.read()
    words = [word.decode("utf-8") for word in s.splitlines() if len(word) == 5]

In [66]:
# build a regex
def build_regex(input_pattern):
    partial_pattern, exclude_chars = input_pattern.split('|')
    exclude_chars = "".join([e for e in exclude_chars])
    correct_chars = "".join([p for p in partial_pattern if p >= 'A' and p <= 'Z'])
    pattern = "^"
    
    partial_pattern_partial_chars = [p for p in partial_pattern if p >= 'a' and p <= 'z']
    if (len(partial_pattern_partial_chars) > 0):
        partial_matches = "".join(partial_pattern_partial_chars)
        pattern += (f"(?=.*[{partial_matches}])")
        
    for c in partial_pattern:
        # this can't be an excluded character or a correct character
        if c == '_':
            pattern += f"[^{exclude_chars}{correct_chars}]"
        # correct char in the correct position
        elif c >= 'A' and c <= 'Z':
            pattern += c.lower()
        # this position cannot be the specified character or an excluded character or a correct one
        elif c >= 'a' and c <= 'z':
            pattern += f"[^{correct_chars}{exclude_chars}{c}]"
    return pattern


# test the pattern
def test(pattern, limit = 25):
    regex = build_regex(pattern)
    print(regex)
    matching_words = [w for w in words if re.match(regex, w)]
    if (use_wordfreqs):
        l = []
        for w in matching_words:
            l.append((w,wf.word_frequency(w, 'en')))
        l.sort(key = lambda f : f[1], reverse = True)
        l = l[:limit]
        for i in l:
            print(f"{i[0]}|{i[1]}")
    else:     
        for w in matching_words:
            print(w)


Test format string
- Underscores are unknown characters
- Use lower case chars for right letter + wrong position (yellow)
- Use upper case for right letter in the right position (green)

In [62]:
test("__r__|paty")

^(?=.*[r])[^paty][^paty][^patyr][^paty][^paty]
where|0.001
never|0.000813
under|0.000537
order|0.000309
hours|0.000251
wrong|0.000245
bring|0.000186
round|0.000138
drive|0.000135
green|0.000135
lower|0.000129
cover|0.00012
rules|0.000115
offer|0.000112
river|0.000107
brown|0.000102
cross|0.0001
floor|8.71e-05
older|8.32e-05
color|8.13e-05
drink|7.94e-05
crime|7.76e-05
broke|7.08e-05
fresh|6.76e-05
owner|6.76e-05


In [67]:
test("DRI__|vepaty")

^dri[^vepatyDRI][^vepatyDRI]
drink|7.94e-05
drill|1.23e-05
dribs|3.47e-08
drinn|0.0
drisk|0.0
