<a href="https://colab.research.google.com/github/bobsport33/Wordle-Analysis/blob/main/Wordle_Analysis.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import string
from collections import Counter
import random
from bs4 import BeautifulSoup
import requests


In [5]:
# 1. produce a dictionary

with open('dictionary.txt') as file:
    dictionary = [line.rstrip() for line in file]


['a', 'aa', 'aaa', 'aah', 'aahed']


In [6]:
# 2. create a dictionary with only 5 letter words

five_letter_words = []
for word in dictionary:
    if len(word) == 5:
        five_letter_words.append(word)


['aahed', 'aalii', 'aargh', 'aaron', 'abaca']


In [8]:
# 3. find the percentages of the most common letters in 5 letter words

alphabet_string = string.ascii_lowercase
alphabet = list(alphabet_string)

letters = []
for word in five_letter_words:
    letters += Counter(word)

letter_counter = Counter(letters)

print(letter_counter)

Counter({'a': 7247, 'e': 6728, 's': 5871, 'r': 4864, 'i': 4767, 'o': 4613, 'l': 3923, 't': 3866, 'n': 3773, 'u': 3241, 'd': 2639, 'c': 2588, 'y': 2476, 'm': 2361, 'h': 2223, 'p': 2148, 'b': 1936, 'g': 1867, 'k': 1663, 'w': 1160, 'f': 1115, 'v': 853, 'z': 435, 'j': 372, 'x': 357, 'q': 139})


In [9]:
# 4. Parse the dictionary to find the 5 letter words that use the highest percentage letters

total_letters = len(five_letter_words) * 5

letters_by_percentage = {}
for letter in alphabet:
    percentage = (letter_counter[letter] / total_letters) * 100
    letters_by_percentage[letter] = percentage

print(letters_by_percentage)

{'a': 9.10541525317251, 'b': 2.432466390250031, 'c': 3.2516647820077895, 'd': 3.3157431838170623, 'e': 8.453323281819323, 'f': 1.40092976504586, 'g': 2.345772081919839, 'h': 2.793064455333585, 'i': 5.989445910290237, 'j': 0.4673954014323407, 'k': 2.089458474682749, 'l': 4.929011182309336, 'm': 2.9664530719939695, 'n': 4.740545294635004, 'o': 5.7959542656112575, 'p': 2.6988315114964188, 'q': 0.17464505591154666, 'r': 6.111320517652972, 's': 7.376554843573313, 't': 4.85739414499309, 'u': 4.072119613016711, 'v': 1.071742681241362, 'w': 1.4574695313481594, 'x': 0.4485488126649077, 'y': 3.1109435858776227, 'z': 0.5465510742555597}


In [11]:
# 5. create a list of the best possible 5 letter guesses

best_words = []

for word in five_letter_words:
    total = 0
    letters = []
    for letter in word:
        if letter in letters:
            total += 0
        else:
            number = float(letters_by_percentage[letter])
            total += number
            letters.append(letter)
    best_words.append((word, total))

best_words.sort(key=lambda a: a[1])
top_guesses = best_words[-70: ]

print(top_guesses)

[('salet', 34.72169870586757), ('slate', 34.72169870586757), ('stale', 34.72169870586757), ('stela', 34.72169870586757), ('taels', 34.72169870586757), ('tales', 34.72169870586757), ('teals', 34.72169870586757), ('tesla', 34.72169870586757), ('astel', 34.721698705867574), ('setal', 34.721698705867574), ('steal', 34.721698705867574), ('aures', 35.11873350923483), ('serau', 35.11873350923483), ('urase', 35.11873350923483), ('ureas', 35.11873350923483), ('ursae', 35.11873350923483), ('aeons', 35.471792938811404), ('aotes', 35.58864178916949), ('stoae', 35.58864178916949), ('aloes', 35.66025882648574), ('alose', 35.66025882648574), ('osela', 35.66025882648574), ('solea', 35.66025882648574), ('anise', 35.66528458349039), ('insea', 35.66528458349039), ('siena', 35.66528458349039), ('sinae', 35.66528458349039), ('saite', 35.78213343384847), ('taise', 35.78213343384847), ('earns', 35.78715919085312), ('nares', 35.78715919085312), ('nears', 35.78715919085312), ('anser', 35.787159190853124), ('ra

In [12]:
# 6. Find a list of past wordle words and find letter usage

response = requests.get("https://tryhardguides.com/wordle-answers/")
website = response.text

soup = BeautifulSoup(website, "html.parser")

a_tags = soup.select(selector="ul li strong")

recent_wordle = []
for tag in a_tags:
    text = tag.getText()
    recent_wordle.append(text.lower())

recent_total_letters = len(recent_wordle) * 5

recent_wordle_letters = []
for word in recent_wordle:
    recent_wordle_letters += Counter(word)

recent_letter_counter = Counter(recent_wordle_letters)

recent_letter_percentage = {}
for letter in alphabet:
    percentage = (recent_letter_counter[letter] / recent_total_letters) * 100
    recent_letter_percentage[letter] = percentage

print(recent_letter_percentage)

{'a': 8.148148148148149, 'b': 1.6666666666666667, 'c': 4.074074074074074, 'd': 2.2222222222222223, 'e': 7.777777777777778, 'f': 1.4814814814814816, 'g': 1.8518518518518516, 'h': 4.444444444444445, 'i': 4.2592592592592595, 'j': 0.0, 'k': 2.5925925925925926, 'l': 5.185185185185185, 'm': 2.5925925925925926, 'n': 4.444444444444445, 'o': 7.962962962962964, 'p': 3.5185185185185186, 'q': 0.3703703703703704, 'r': 8.333333333333332, 's': 6.111111111111111, 't': 7.037037037037037, 'u': 4.2592592592592595, 'v': 0.9259259259259258, 'w': 2.4074074074074074, 'x': 0.3703703703703704, 'y': 2.7777777777777777, 'z': 0.0}


In [13]:
# 7. Using the recent letter percentage, find the best words in the dictionary

best_wordle_words = []

for word in five_letter_words:
    total = 0
    letters = []
    for letter in word:
        if letter in letters:
            total += 0
        else:
            number = float(recent_letter_percentage[letter])
            total += number
            letters.append(letter)
    best_wordle_words.append((word, total))

best_wordle_words.sort(key=lambda a: a[1])
top_wordle_guesses = best_wordle_words[-70: ]

print(top_guesses)
print(top_wordle_guesses)

[('salet', 34.72169870586757), ('slate', 34.72169870586757), ('stale', 34.72169870586757), ('stela', 34.72169870586757), ('taels', 34.72169870586757), ('tales', 34.72169870586757), ('teals', 34.72169870586757), ('tesla', 34.72169870586757), ('astel', 34.721698705867574), ('setal', 34.721698705867574), ('steal', 34.721698705867574), ('aures', 35.11873350923483), ('serau', 35.11873350923483), ('urase', 35.11873350923483), ('ureas', 35.11873350923483), ('ursae', 35.11873350923483), ('aeons', 35.471792938811404), ('aotes', 35.58864178916949), ('stoae', 35.58864178916949), ('aloes', 35.66025882648574), ('alose', 35.66025882648574), ('osela', 35.66025882648574), ('solea', 35.66025882648574), ('anise', 35.66528458349039), ('insea', 35.66528458349039), ('siena', 35.66528458349039), ('sinae', 35.66528458349039), ('saite', 35.78213343384847), ('taise', 35.78213343384847), ('earns', 35.78715919085312), ('nares', 35.78715919085312), ('nears', 35.78715919085312), ('anser', 35.787159190853124), ('ra