# Wordle

In [1]:
from datetime import datetime
import os
import random

import openai

openai.api_key = os.environ["OPENAI_API_KEY"]  # Set the OpenAI key

## Utility functions

In [2]:
result2emoji = {
    -1: "⬜️",
    0: "⬛️",
    1: "🟨",
    2: "🟩",
}


def query_gpt(messages, model: str):
    response = openai.ChatCompletion.create(
        messages=messages,
        model=model,
    )

    # Get the response text
    answer = response["choices"][0]["message"]["content"]

    return answer


def update_messages(messages: list, assistant_msg: str, user_msg: str):
    """Adds prompt to GPT messages"""
    messages.extend(
        [
            {"role": "assistant", "content": assistant_msg},
            {"role": "user", "content": user_msg},
        ]
    )
    return messages


def extract_guess(answer: str) -> str:
    """Extract the wordle guess from the last line (as prompted)."""

    # extract the last line
    guess = answer.split("\n")[-1]

    # remove periods, happens sometimes...
    guess = guess.strip(".")

    # uppercase the answer
    guess = guess.upper()
    return guess


def load_words(filepath):
    """Loads a .txt file, reads each word line by line, keeps only 5 letter words."""
    with open(filepath, "r") as f:
        # parse words, upper-case, remove newlines, limit to 5-letter words
        word_list = [line.strip().upper() for line in f]
        word_list = [word for word in word_list if len(word) == 5]
    return word_list


def results_to_emojis(results):
    emojis = " ".join([result2emoji[result] for result in results])
    return emojis


## Define the WORDLE game

In [3]:
class Wordle:
    """Code adapted from: https://github.com/hrishikeshrt/python-wordle/blob/master/wordle/wordle.py.

    Found issues in the logic, I cleaned it up and made the rules more like the official wordle.
    """

    def __init__(self, word: str = None, valid_wordfile: str = "vocabulary.txt"):
        self.word_length = 5
        self.max_attempts = 6
        self.word = word.upper()

        # List of all valid english words
        self.valid_words = load_words(valid_wordfile)

        assert word.upper() in self.valid_words, f"word {word} not in vocabulary"

        self.num_attempts = 0
        self.attempted_words = []
        self.solved = False
        self.failed = False

    def evaluate_guess(self, word: str, guess: str) -> list:
        """Implements the WORDLE logic.

        results is a list of ints, indicating the tile value.

        -1: letter not yet evaluated
        0: letter not in word
        1: letter in word, not properly placed
        2: letter properly placed

        example:

        word: apple
        guess: piped

        returns [1, 0, 2, 1, 0]

        """
        guess = guess.upper()
        word = word.upper()

        # initialize result
        result = [-1] * 5

        # initialize remaining letters
        remaining_letters = list(word)

        if word == guess:
            return [2] * 5

        corrects = [w == g for w, g in zip(word, guess)]
        corrects_idx = [idx for idx, correct in enumerate(corrects) if correct]

        # validate all properly placed tiles first
        for idx in sorted(corrects_idx, reverse=True):
            result[idx] = 2
            remaining_letters.pop(idx)

        # now go verify incorrectly placed letters...
        for idx, letter in enumerate(guess):
            if result[idx] == 2:
                # this was already validated, ignore
                continue

            if letter in remaining_letters:
                # incorrectly placed
                result[idx] = 1
                remaining_letters.remove(letter)
            else:
                # not in word
                result[idx] = 0

        return result

    def play(self, guess: str):
        assert isinstance(guess, str), "Only str guesses are supported"

        # upper-case
        guess = guess.upper()

        result = []
        message = ""

        if self.solved:
            return [], "Game already solved!"

        if self.failed == True:
            return [], "Max attempts reached."

        if len(guess) != 5:
            return [], "You can only use 5 letter words, try again"

        if guess not in self.valid_words:
            return [], "Invalid word, try again"

        if guess in self.attempted_words:
            return [], "Word already guessed, try again."

        self.attempted_words.append(guess)

        result = self.evaluate_guess(word=self.word, guess=guess)
        self.num_attempts += 1

        if self.num_attempts >= self.max_attempts:
            self.failed = True

        if result == [2] * 5:
            self.solved = True
            message = "Solved"
        else:
            message = "Next guess"

        return result, message

## Define the prompt

In [4]:
INSTRUCTIONS = """You are playing the game wordle.

# HOW TO PLAY
Guess the wordle in 6 tries.

* Each guess must be a valid 5-letter word.
* The color of the tiles will change to show how close your guess was to the word.

Examples:
WEARY: 🟩 ⬜ ⬜ ⬜ ⬜, W is in the word and in the correct spot.

PILLS: ⬜ 🟨 ⬜ ⬜ ⬜, I is in the word but in the wrong spot.

VAGUE: ⬜ ⬜ ⬜ ⬛️ ⬜, U is not in the word in any spot.

APPLE: 🟩 ⬛ ⬛ ⬛ ⬛, A is in the correct spot, P-P-L-E are not in the word.

Respond with your reasoning, step by step, motivating your choice. After your reasning, respond, on a newline, only with your guess, where your guess is a valid 5 letter word and nothing else. 

For example:

Because I know that the A is in the first place, I will guess the word apple.
APPLE

Game begins.
⬜ ⬜ ⬜ ⬜ ⬜
"""


## Play the actual game

In [5]:
MODELS = ["gpt-3.5-turbo", "gpt-4"]
MAX_GAMES = 10
MAX_ATTEMPS = 10  # this allows for incorrect guesses, which sometimes happens (e.g. invalid word, too many letters, etc.) Wordle itself is limited to 6 valid guesses.
WORDS_FILE = "words.txt"  # Only use words that were used in previous wordle NYT games, taken from https://www.rockpapershotgun.com/wordle-past-answers
SEED = 42

# Load previous wordle words to play
words: list = load_words(WORDS_FILE)

# keep track of score here
games_solved = {model: 0 for model in MODELS}
num_attempts = {model: [] for model in MODELS}

print(
    f"This code was executed on {datetime.now()}"
)  # timestamp because GPT models change al the time...

random.seed(SEED)  # set seed for reproducibility

for game_count in range(MAX_GAMES):
    print(f"{'*'*80}")

    # pick random word
    word = random.choice(words)

    for model in MODELS:
        # initialize game
        game = Wordle(word=word)

        # initialize gpt prompt
        messages = [
            {"role": "system", "content": INSTRUCTIONS},
        ]

        # display the game info
        print("=" * 79)
        print(f"Word to guess: {game.word}")
        print(f"Using model: {model}")
        print("=" * 79)

        for attempt in range(MAX_ATTEMPS):
            if game.solved:
                games_solved[model] += 1
                print("Game solved!")
                num_attempts[model].append(game.num_attempts)
                break

            if game.failed:
                print("Game failed, too many guesses.")
                break

            if attempt + 1 == MAX_ATTEMPS:
                print("Maximum attempts exceeded.")
                break

            try:
                gpt_response = query_gpt(messages, model=model)
                gpt_guess = extract_guess(gpt_response)
                results, msg = game.play(gpt_guess)

                if len(results) > 0:
                    # valid guess
                    game_response = results_to_emojis(results)
                else:
                    game_response = msg

                print(f"[{model}] {gpt_response}")
                print(f"[user] {game_response}")

                # update prompt with previous exchanges
                messages = update_messages(messages, gpt_response, game_response)

            except Exception as err:
                # probably a failed connection to openAI API
                print("Something went wrong, trying again...")

        print(f"{'*'*80}\n")

This code was executed on 2023-03-26 17:58:21.880951
********************************************************************************
Word to guess: CHUNK
Using model: gpt-3.5-turbo
[gpt-3.5-turbo] I will start by guessing a common 5-letter English word that starts with a vowel, since vowels are generally more frequent in English words. I will guess the word 'FABLE'. 

FABLE
[user] ⬛️ ⬛️ ⬛️ ⬛️ ⬛️
[gpt-3.5-turbo] Based on the feedback, none of the letters in my guess appear in the actual wordle, and so I need to guess a different word. I will now guess a 5-letter word that starts with a consonant, since that is the remaining possibility. I will guess the word 'TRUCK'.

TRUCK
[user] ⬛️ ⬛️ 🟩 🟨 🟩
[gpt-3.5-turbo] Based on the feedback, one of the letters in my guess appears in the wordle, in the 3rd position, and another letter is in the wordle but in the wrong position. I will try a word that has the letter in the 3rd position and other letters that might fit in the other positions. One wo

[gpt-3.5-turbo] I apologize once more, I will guess the word "blend".
[user] You can only use 5 letter words, try again
[gpt-3.5-turbo] I am sorry, again. For my fourth try, I will guess the word "blunt".
[user] You can only use 5 letter words, try again
[gpt-3.5-turbo] I apologize for my mistake. For my fifth guess, I will go with the word "spark".
[user] You can only use 5 letter words, try again
[gpt-3.5-turbo] I'm sorry for the error. For my final guess, I will guess the word "cloud".
[user] You can only use 5 letter words, try again
[gpt-3.5-turbo] I apologize for the mistake. For my final guess, I will try the word "fable".
[user] You can only use 5 letter words, try again
[gpt-3.5-turbo] I apologize once again for my mistake. For my last try, I will have to go with the word "crust".
[user] You can only use 5 letter words, try again
Maximum attempts exceeded.
********************************************************************************

Word to guess: HERON
Using model: gpt-4


[gpt-3.5-turbo] I will start by guessing the word "hello".
HELLO
[user] ⬛️ 🟨 ⬛️ ⬛️ ⬛️
[gpt-3.5-turbo] My next guess will be "world".
WORLD
[user] ⬛️ ⬛️ 🟨 ⬛️ ⬛️
[gpt-3.5-turbo] My next guess will be "noise".
NOISE
[user] ⬛️ ⬛️ ⬛️ ⬛️ 🟩
[gpt-3.5-turbo] Great! I got one letter in the correct position.
Based on the letter I got right, I think the word could be "fable".
FABLE
[user] ⬛️ 🟨 ⬛️ ⬛️ 🟩
[gpt-3.5-turbo] Since I got the third letter "L" correct, I will try "flame".
FLAME
[user] ⬛️ ⬛️ 🟩 ⬛️ 🟩
[gpt-3.5-turbo] I've made two correct guesses now, looks like "above" could fit these criteria.
ABOVE
[user] 🟨 ⬛️ ⬛️ 🟩 🟩
Game failed, too many guesses.
********************************************************************************

Word to guess: CRAVE
Using model: gpt-4
[gpt-4] I will start with a common starting word to potentially reveal some vowels and consonants.
AHEAD
[user] 🟨 ⬛️ 🟨 ⬛️ ⬛️
[gpt-4] Since the A and E are in the word but in different positions, I will try to find the correct pos

[gpt-4] Since I have no information about the word yet, I will start with a common word that has a diverse set of letters.
TABLE
[user] ⬛️ ⬛️ 🟨 ⬛️ 🟩
[gpt-4] The third letter is E and the last letter is L, and both of these are in wrong positions. I will now choose a new word taking these into consideration.
FLAME
[user] ⬛️ ⬛️ ⬛️ ⬛️ 🟩
[gpt-4] There is only one correct letter in the word, and it is E in the last position. I will try a new word with different letters but keeping the E at the end.
SLATE
[user] ⬛️ ⬛️ ⬛️ ⬛️ 🟩
[gpt-4] Again, only the E in the last position is correct. I'll try another word with a different set of letters and keep the E at the end.
PRIDE
[user] ⬛️ 🟩 🟩 ⬛️ 🟩
[gpt-4] Now I know that R is the second letter and I is the third, and the last letter is still E. I will guess a word based on these positions.
PRICE
[user] ⬛️ 🟩 🟩 ⬛️ 🟩
[gpt-4] I guessed the second and third letters correctly, but the fourth letter is still incorrect. I will try another combination based on

## Print the statistics

In [6]:
for model in MODELS:
    print()
    print(f"{model} summary:")
    print(f"Games solved: {games_solved[model]}")
    print(f"Games played: {MAX_GAMES}")
    print(f"Accuracy %: {games_solved[model]/MAX_GAMES * 100 :2.1f}")
    print()



gpt-3.5-turbo summary:
Games solved: 0
Games played: 10
Accuracy %: 0.0


gpt-4 summary:
Games solved: 1
Games played: 10
Accuracy %: 10.0

