Solves Wordle game (https://www.powerlanguage.co.uk/wordle/)


Required: Download Collins Scrabble words dictionary from following location:

https://drive.google.com/file/d/1oGDf1wjWp5RF_X9C7HoedhIWMh5uJs8s/view (Also available in github along with this notebook)

In [1]:
from collections import Counter, defaultdict
import copy
from typing import List, Optional, Dict, Tuple


In [2]:
def dedup_scorer(word, 
                 char_scores,
                 known_greys: List[str] = None, 
                 known_yellows: Dict[str,List[int]] = None) -> int:
    """
    When a char occurs multiple times in a word, this returns the score by counting it only in the most frequent position
    word: Current word for which score should be computed
    char_scores: Dictionary with each alphabet as key, and value is another dictionary with char position(0 to 4) as keys and word frequency in that position as values
    known_greys: List of alphabets that are known to be not present in the word. Response from wordle
    known_yellows: Dictionary with yellow colored alphabets as keys, and values as positions where they are known to be absent
    Returns: score with below logic
    Logic: If the word contains any char from known_greys or char from known_yellows with char in same position known to be invalid, score is automatically zero.
    Else:
    For every character, score is the no:of words where the character is seen in same position. 
    If a char is present multiple times in word, it is counted only once in the position where score is highest.
    These char level scores are summed up to give word score
    """
    chars = list(word)
    if known_greys:
        for char in chars:
            if char in known_greys:
                return 0
    if known_yellows:
        for char, pos_list in known_yellows.items():
            if char not in chars:
                return 0
            for pos in pos_list:
                if chars[pos] == char:
                    return 0
    if max([chars.count(char) for char in chars]) == 1:
        s=0
        for pos, char in enumerate(chars):
            s+=char_scores[char].get(pos,0)
    else:
        s=0
        char_counter = defaultdict(list)
        for pos, char in enumerate(chars):
            char_counter[char].append(pos)
        for char, pos_list in char_counter.items():
            max_score = 0
            for pos in pos_list:
                if char_scores[char].get(pos,0)>max_score:
                    max_score = char_scores[char][pos]
            s += max_score

    return s

def score_words_by_position(words_list: List[str], 
                            char_scores: Dict[str, Dict[int, int]],
                            known_greys: List[str] = None,
                            known_yellows: Dict[str,List[int]] = None):
    """
    For each word, score is from term frequency at that position. If word contains a word more than once, it gets only one score - whichever is highest.
    """
    word_scores = {}
    for word in words_list:
        word_scores[word] = dedup_scorer(word, char_scores, known_greys, known_yellows)
    return word_scores

def update_char_scores(char_scores_by_pos, wordle_word, wordle_response: Tuple[int]) -> Dict[str, Dict[int, int]]:
    new_scores = copy.deepcopy(char_scores_by_pos)
    for position, char in enumerate(wordle_word):
        if wordle_response[position] == 0:
            for pos in range(5):
                new_scores[char][pos] = 0
        elif wordle_response[position] == 1:
            new_scores[char][position] = 0
        elif wordle_response[position] == 2:
            new_scores[char][position] = 100000
    return new_scores
        

def update_greys(current_greys: List[str], token: str, wordle_response: List[int]):
    current_greys_upd = current_greys.copy()
    for pos, char in enumerate(list(token)):
        if char in current_greys_upd:
            # known info.
            continue
        elif wordle_response[pos] == 0:
            current_greys_upd.append(char)
    return current_greys_upd

def update_yellows(current_yellows: Dict[str, List[int]], token: str, wordle_response: List[int]):
    current_yellows_upd = copy.deepcopy(current_yellows)
    for pos, char in enumerate(token):
        if wordle_response[pos] == 1:
            if char not in current_yellows_upd:
                current_yellows_upd[char] = []
            if pos not in current_yellows_upd[char]:
                current_yellows_upd[char].append(pos)
    return current_yellows_upd

In [3]:
with open("/Users/dileep/Downloads/Collins Scrabble Words (2019).txt","r") as f:
    word_list = f.read().splitlines()
WORD_LENGTH = 5

valid_words = [word for word in word_list if len(word) == WORD_LENGTH]

In [4]:
char_scores_by_position = defaultdict(dict)

In [5]:
for word in valid_words:
    for position, char in enumerate(list(word)):
        char_score_ = char_scores_by_position[char]
        if char_score_.get(position) is None:
            char_score_[position] = 0
        char_score_[position] += 1

In [6]:
len(char_scores_by_position)

26

In [7]:
known_greys = []
known_yellows = {}
word_scores_by_position = score_words_by_position(valid_words, char_scores_by_position, known_greys, known_yellows)
guess = Counter(word_scores_by_position).most_common()[0]
print(guess[0])

CARES


In [8]:
# Enter Wordle response as a list. Yellow = 1, Green = 2, Grey = 0. So for CARES if you get C as yellow, A as green, and rest as grey, response would be [1,2,0,0,0]
response = [0,2,1,0,0]

char_scores_by_position = update_char_scores(char_scores_by_position, guess[0], response)
known_greys = update_greys(known_greys, guess[0], response)
known_yellows = update_yellows(known_yellows, guess[0], response)
word_scores_by_position = score_words_by_position(valid_words, char_scores_by_position, known_greys, known_yellows)
guess = Counter(word_scores_by_position).most_common()[0]
print(guess[0])

RAINY


Repeat above cell till correct answer 

In [9]:
# Enter Wordle response as a list. Yellow = 1, Green = 2, Grey = 0. So for CARES if you get C as yellow, A as green, and rest as grey, response would be [1,2,0,0,0]
response = [1,2,0,0,0]

char_scores_by_position = update_char_scores(char_scores_by_position, guess[0], response)
known_greys = update_greys(known_greys, guess[0], response)
known_yellows = update_yellows(known_yellows, guess[0], response)
word_scores_by_position = score_words_by_position(valid_words, char_scores_by_position, known_greys, known_yellows)
guess = Counter(word_scores_by_position).most_common()[0]
print(guess[0])

FAURD


Repeat above cell till correct answer 

In [10]:
# Enter Wordle response as a list. Yellow = 1, Green = 2, Grey = 0. So for CARES if you get C as yellow, A as green, and rest as grey, response would be [1,2,0,0,0]
response = [2,2,0,1,0]

char_scores_by_position = update_char_scores(char_scores_by_position, guess[0], response)
known_greys = update_greys(known_greys, guess[0], response)
known_yellows = update_yellows(known_yellows, guess[0], response)
word_scores_by_position = score_words_by_position(valid_words, char_scores_by_position, known_greys, known_yellows)
guess = Counter(word_scores_by_position).most_common()[0]
print(guess[0])

FAVOR


Repeat above cell till correct answer 