<a href="https://colab.research.google.com/github/inyoot/PythonWordleSolver/blob/main/Wordle2nd.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
"""
Wordle solver
It will load the English words with its frequency from Kaggle dataset, filtered
only 5 letter words when read it line by line.
The input will the guess word and its colored coded feedback from Wordle
The output will be a list of suggested words to guess next
You can call solve many time until all words are guessed.
"""

import csv

def csv_to_dict(file_path):
    """Reads a CSV file and converts it into a dictionary.

    Args:
      file_path: The path to the CSV file.

    Returns:
      A dictionary, where key is the word and value is the count.
      All the words are five-letter word and in lower case.
      Returns an empty dictionary if the file is not found or if an error occurs.
    """
    data = {}
    try:
        with open(file_path, 'r', newline='', encoding='utf-8') as file:
            reader = csv.DictReader(file)
            for row in reader:
            #print(row)
                word = row['word']
                if len(word) == 5:
                    data[word] = row['count']
    except Exception as e:
        print(f"An error occurred: {e}")
        return {}
    return data

# The data is from
# https://www.kaggle.com/datasets/rtatman/english-word-frequency?resource=download
file_path = '/content/drive/MyDrive/data/unigram_freq.csv'
result_dict = csv_to_dict(file_path)
filtered_words = list(result_dict.keys())
print("The total five letter words: ", len(filtered_words))
print("The top ten most frequent five letter word", filtered_words[:10])

# Generating the initial guess where there is no redundant letter and has at
# least three vowels

# Remove the redundant letter
def has_redundant_letters(text: str) -> bool:
    return len(text) > len(set(text))

initial_words = [word for word in filtered_words
                 if not has_redundant_letters(word)]

# The guess words should have at least 3 vowels
initial_words = [word for word in initial_words[:100]
                 if len(set(word).intersection(set('aeiouy'))) > 2]

print("The top ten initial guess words: ", initial_words[:10])


def solve(guess_word, guess_feedback, filtered_words):
    """Solve the Wordle game.
    Args:
        Guess_word: The word guessed by the player.
        guess_feedback: The feedback given by the Wordle game.
        # R = GRAY - forbidden letters
        # G = GREEN - right letter on right position - in_place
        # Y = YELLOW - right letter wrong position - contains and not_in
        # Example: guess_feedback = 'RRRYR'
        filtered_words: The list of possible words
    Returns:
        A list of suggested words to guess next.

    Precondition: the filtered_words is a list of five letter words in lower case
    """
    forbidden_letters = ''
    in_place = []
    not_ins = []

    for idx, (letter, feedback) in enumerate(zip(guess_word, guess_feedback)):
        if feedback == 'G':
            in_place.append((letter, idx))
        elif feedback == 'Y':
            not_ins.append((letter, idx))
        elif feedback == 'R':
            forbidden_letters += letter


    # remove all the words with the forbidden letters
    filtered_words = [word for word in filtered_words
                      if set(word).isdisjoint(set(forbidden_letters))]

    # remove all the words not containing the correct letters in the right places
    filtered_words = [word for word in filtered_words
                      if all(word[i] == letter for letter, i in in_place)]

    # remove all the words not containing letter that is not in correct position
    contains = [letter for letter, _ in not_ins]
    contains = "".join(contains)
    filtered_words = [word for word in filtered_words
                      if all(letter in word for letter in contains)]

    # remove all the words not containing letter that is not in correct position
    for not_in in not_ins:
        filtered_words = [word for word in filtered_words
                          if word[not_in[1]] != not_in[0]]
    print("The top ten guess words: ", filtered_words[:10])
    return filtered_words



The total five letter words:  39933
The top ten most frequent five letter word ['about', 'other', 'which', 'their', 'there', 'first', 'would', 'these', 'click', 'price']
The top ten initial guess words:  ['about', 'email', 'video', 'years', 'today', 'house', 'media', 'guide', 'image', 'money']


In [None]:
guess_word = 'guide'
guess_feedback = 'YRRRY'
filtered_words = solve(guess_word, guess_feedback, filtered_words)

The top ten guess words:  ['pages', 'legal', 'agent', 'vegas', 'began', 'angel', 'egypt', 'roger', 'omega', 'wages']


In [None]:
guess_word = 'omega'
guess_feedback = 'RRYYY'
filtered_words = solve(guess_word, guess_feedback, filtered_words)

The top ten guess words:  ['pages', 'legal', 'vegas', 'began', 'angel', 'wages', 'anger', 'eager', 'vegan', 'agnes']


In [None]:
guess_word = 'angel'
guess_feedback = 'YRGGR'
filtered_words = solve(guess_word, guess_feedback, filtered_words)

The top ten guess words:  ['pages', 'wages', 'eager', 'cages', 'pager', 'wager', 'sages', 'sager', 'hager', 'paget']


In [None]:
guess_word = 'pages'
guess_feedback = 'RGGGR'
filtered_words = solve(guess_word, guess_feedback, filtered_words)

The top ten guess words:  ['eager', 'wager', 'hager', 'jager', 'yager', 'cagey', 'jagex', 'rager', 'cager', 'tager']


In [None]:
guess_word = 'eager'
guess_feedback = 'GGGGG'
filtered_words = solve(guess_word, guess_feedback, filtered_words)

In [None]:
guess_word = 'house'
guess_feedback = 'RGRRR'
filtered_words = solve(guess_word, guess_feedback, filtered_words)

The top ten guess words:  ['world', 'local', 'today', 'total', 'board', 'point', 'going', 'topic', 'login', 'color']


In [None]:
guess_word = 'world'
guess_feedback = 'RGRRR'
filtered_words = solve(guess_word, guess_feedback, filtered_words)

The top ten guess words:  ['point', 'going', 'topic', 'nokia', 'joint', 'comic', 'booty', 'tokyo', 'gonna', 'combo']


In [None]:
guess_word = 'point'
guess_feedback = 'RGRYR'
filtered_words = solve(guess_word, guess_feedback, filtered_words)

The top ten guess words:  ['congo', 'conan', 'novak', 'gonzo', 'bongo', 'monza', 'jovan', 'conga', 'kogan', 'mongo']


In [None]:
guess_word = 'congo'
guess_feedback = 'RGGGG'
filtered_words = solve(guess_word, guess_feedback, filtered_words)

The top ten guess words:  ['bongo', 'mongo', 'kongo', 'vongo', 'zongo']


In [None]:
guess_word = 'bongo'
guess_feedback = 'GGGGG'

In [None]:
guess_word = 'money'
guess_feedback = 'RRRGR'
filtered_words = solve(guess_word, guess_feedback, filtered_words)

The top ten guess words:  ['after', 'based', 'pages', 'three', 'water', 'sites', 'level', 'sales', 'rates', 'paper']


In [None]:
guess_word = 'after'
guess_feedback = 'RRRGR'
filtered_words = solve(guess_word, guess_feedback, filtered_words)

The top ten guess words:  ['level', 'speed', 'gives', 'lives', 'sleep', 'devel', 'wheel', 'sizes', 'blues', 'lived']


In [None]:
guess_word = 'gives'
guess_feedback = 'RGRGR'
filtered_words = solve(guess_word, guess_feedback, filtered_words)

The top ten guess words:  ['liked', 'pixel', 'wiped', 'libel', 'diced', 'piled', 'piped', 'kihei', 'hiked', 'pipex']


In [None]:
guess_word = 'pixel'
guess_feedback = 'YGRGR'
filtered_words = solve(guess_word, guess_feedback, filtered_words)

The top ten guess words:  ['wiped', 'bicep', 'biped', 'ziped', 'diped', 'cipec']


In [None]:
guess_word = 'wiped'
guess_feedback = 'RGYGR'
filtered_words = solve(guess_word, guess_feedback, filtered_words)

The top ten guess words:  ['bicep']


In [None]:
guess_word = 'adieu'
guess_feedback = 'RYRGR'
filtered_words = solve(guess_word, guess_feedback, filtered_words)

The top ten guess words:  ['order', 'model', 'speed', 'older', 'moved', 'owned', 'codes', 'ended', 'noted', 'devel']


In [None]:
guess_word = 'model'
guess_feedback = 'RGYGG'
filtered_words = solve(guess_word, guess_feedback, filtered_words)

The top ten guess words:  ['dowel', 'dotel', 'dorel', 'doyel']


In [None]:
guess_word = 'adieu'
guess_feedback = 'RGYGG'
filtered_words = solve(guess_word, guess_feedback, filtered_words)

In [None]:
guess_word = 'audio'
# R = GRAY - forbidden letters
# G = GREEN - right letter on right position - in_place
# Y = YELLOW - right letter wrong position - contains and not_in
guess_feedback = 'YRRRR'
# you can put the guess word and run SOLVE again until the feedback is all green

filtered_words = solve(guess_word, guess_feedback, filtered_words)

The top ten guess words:  ['state', 'years', 'games', 'great', 'black', 'pages', 'place', 'water', 'small', 'shall']


In [None]:
guess_word = 'games'
guess_feedback = 'RYRYY'
filtered_words = solve(guess_word, guess_feedback, filtered_words)

The top ten guess words:  ['state', 'space', 'share', 'least', 'scale', 'phase', 'speak', 'shape', 'lease', 'beast']


In [None]:
guess_word = 'house'
# R = GRAY - forbidden letters
# G = GREEN - right letter on right position - in_place
# Y = YELLOW - right letter wrong position - contains and not_in
guess_feedback = 'RRRRR'
# you can put the guess word and run SOLVE again until the feedback is all green

filtered_words = solve(guess_word, guess_feedback, filtered_words)

The top ten guess words:  ['click', 'black', 'print', 'again', 'april', 'party', 'track', 'david', 'daily', 'final']


In [None]:
guess_word = 'space'
# R = GRAY - forbidden letters
# G = GREEN - right letter on right position - in_place
# Y = YELLOW - right letter wrong position - contains and not_in
guess_feedback = 'YRGRY'
# you can put the guess word and run SOLVE again until the feedback is all green

filtered_words = solve(guess_word, guess_feedback, filtered_words)

The top ten guess words:  ['least', 'beast', 'yeast', 'feast', 'leash', 'keast', 'esata', 'leask', 'erast', 'weast']


In [None]:
guess_word = 'least'
# R = GRAY - forbidden letters
# G = GREEN - right letter on right position - in_place
# Y = YELLOW - right letter wrong position - contains and not_in
guess_feedback = 'RGGGG'
# you can put the guess word and run SOLVE again until the feedback is all green

filtered_words = solve(guess_word, guess_feedback, filtered_words)

The top ten guess words:  ['beast', 'yeast', 'feast', 'keast', 'weast', 'reast']


In [None]:
guess_word = 'beast'
# R = GRAY - forbidden letters
# G = GREEN - right letter on right position - in_place
# Y = YELLOW - right letter wrong position - contains and not_in
guess_feedback = 'RGGGG'
# you can put the guess word and run SOLVE again until the feedback is all green

filtered_words = solve(guess_word, guess_feedback, filtered_words)

The top ten guess words:  ['yeast', 'feast', 'keast', 'weast', 'reast']


In [None]:
guess_word = 'click'
guess_feedback = 'RYRRR'
filtered_words = solve(guess_word, guess_feedback, filtered_words)

The top ten guess words:  ['apply', 'larry', 'malta', 'rally', 'naval', 'fatal', 'dylan', 'badly', 'natal', 'palma']


In [None]:
guess_word = 'apply'
guess_feedback = 'YRRYG'
filtered_words = solve(guess_word, guess_feedback, filtered_words)

The top ten guess words:  ['larry', 'malay', 'lanny', 'baldy', 'balmy', 'dalby', 'laffy', 'landy', 'malty', 'lawry']


In [None]:
guess_word = 'baldy'
guess_feedback = 'GGGRG'
filtered_words = solve(guess_word, guess_feedback, filtered_words)

The top ten guess words:  ['balmy', 'balay']


In [None]:
guess_word = 'about'
# R = GRAY - forbidden letters
# G = GREEN - right letter on right position - in_place
# Y = YELLOW - right letter wrong position - contains and not_in
guess_feedback = 'RRRYR'
# you can put the guess word and run SOLVE again until the feedback is all green

filtered_words = solve(guess_word, guess_feedback, filtered_words)

guess_word = 'music'
guess_feedback = 'RYYRR'
filtered_words = solve(guess_word, guess_feedback, filtered_words)

guess_word = 'users'
guess_feedback = 'YYRRY'
filtered_words = solve(guess_word, guess_feedback, filtered_words)

guess_word = 'slush'
guess_feedback = 'GRGGG'
filtered_words = solve(guess_word, guess_feedback, filtered_words)

guess_word = 'shush'
guess_feedback = 'GGGGG' #DONE

In [None]:
guess_word = 'adieu'
# R = GRAY - forbidden letters
# G = GREEN - right letter on right position - in_place
# Y = YELLOW - right letter wrong position - contains and not_in
guess_feedback = 'YRRRR'
# you can put the guess word and run SOLVE again until the feedback is all green

filtered_words = solve(guess_word, guess_feedback, filtered_words)


The top ten guess words:  ['local', 'black', 'total', 'small', 'shall', 'class', 'start', 'march', 'yahoo', 'staff']


In [None]:
guess_word = 'local'
# R = GRAY - forbidden letters
# G = GREEN - right letter on right position - in_place
# Y = YELLOW - right letter wrong position - contains and not_in
guess_feedback = 'RYGYR'
# you can put the guess word and run SOLVE again until the feedback is all green

filtered_words = solve(guess_word, guess_feedback, filtered_words)

The top ten guess words:  ['macro', 'jacob', 'bacon', 'macon', 'macos', 'facto', 'wacom', 'macho', 'tacos', 'nacho']
