In [3]:
# Dependencies 
import numpy as np
from english_words import english_words_set
import gym

#### Environment

In [4]:
class WordleSingle(gym.Env): 
    
    def __init__(self, 
                 n_letters: int, 
                 n_guesses: int, 
                 valid_words: list = None,
                 answer: str = None, 
                 seed: int = None): 
        
        # Store attributes 
        self.n_letters = n_letters
        self.n_guesses = n_guesses 
        self.board_dims = (self.n_guesses, self.n_letters)
        
        self.seed = seed
        np.random.seed(self.seed)
        
        # Valid words 
        if valid_words is None:
            self.valid_words = [word.lower() for word in english_words_set if len(word) == self.n_letters]
            self.valid_words = [word for word in self.valid_words if "'" not in word and "." not in word]
        else: 
            assert(isinstance(valid_words, list))
            assert([len(word) for word in valid_words == self.n_letters])
            self.valid_words = valid_words
        
        # Action space 
        self.action_space = gym.spaces.Discrete( len(self.valid_words))
        
        # Observation space
        self.observation_space = gym.spaces.Dict({
            'letters': gym.spaces.Box(low = -1, high = 25, shape = self.board_dims, dtype=int), 
            'colors': gym.spaces.Box(low = -1, high = 25, shape = self.board_dims, dtype=int)
        })
        
        # Initialize state
        self.letters = -1 * np.ones(self.board_dims, dtype = np.int8)
        self.colors = -1 * np.ones(self.board_dims, dtype = np.int8)
        self.state = {'letters': self.letters, 'colors': self.colors}
        
        # Info variables
        self.done = False 
        self.guess_count = 0
        
        # Select and answer word
        if answer is not None:
            assert(isinstance(answer, str))
            assert(len(answer) == self.n_letters)
            self.decoded_answer = answer
        else: 
            self.decoded_answer = np.random.choice(self.valid_words)
            
        self.encoded_answer = self._encode(self.decoded_answer)

    def _encode(self, word: str): 
        self.encoder = dict(zip(list("abcdefghijklmnopqrstuvwxyz"), np.arange(26)))
        return [self.encoder[letter] for letter in word]    
    
    def step(self, action: int): 
        
        """
        Takes the action as index from list of actions and converts to words/list of letter-numbers
        and then updates the board 
        {'green': 2, 
         'yellow': 1, 
         'gray': 0, 
         'empty': -1}
        """
        
        # Grab action 
        decoded_action = self.valid_words[action]
        encoded_action = self._encode(decoded_action)
        
        # Update letters grid 
        self.letters[self.guess_count] = encoded_action
        
        # Update colors 
        new_colors = []
        for answer_letters, guess_letter in zip(self.encoded_answer, encoded_action): 
            
            # Green letter (i.e. correct letter in correct spot)
            if guess_letter == answer_letters: 
                new_colors.append(2)
                
            # Yellow letter (i.e. correct letter in incorrect spot)
            elif guess_letter in self.encoded_answer: 
                new_colors.append(1)
                
            # Gray letter (i.e. incorrect letter)
            else: 
                new_colors.append(0)
            
        self.colors[self.guess_count] = new_colors
        
        # Update state 
        self.state = {'letters': self.letters, 'colors': self.colors}
        
    
        # Get reward
        reward = self.compute_reward()
        
        # Increment guess count 
        self.guess_count += 1
        
        # Check if done and/or won
        won = reward > 0
        self.info = {'won': won}
        self.done = (self.guess_count == self.n_guesses) or (won)
        
        return self.state, reward, self.done, self.info
    
    def compute_reward(self): 
        
        """
        Because green is 2, the max score is 2 x number of letters
        We give a score of -1 for any bad guessses
        """
        score = np.sum(self.colors[self.guess_count])
        self.max_score = 2 * self.n_letters
        reward = 1 if score == self.max_score else -1
        
        return reward      
        
    def reset(self, answer: str = None): 
        
        # Initialize state
        self.letters = -1 * np.ones(self.board_dims, dtype = np.int8)
        self.colors = -1 * np.ones(self.board_dims, dtype = np.int8)
        self.state = {'letters': self.letters, 'colors': self.colors}
        
        # Info Variables 
        self.done = False
        self.guess_count = 0
        
        # Select new answer
        if answer is not None:
            assert(isinstance(answer, str))
            assert(len(answer) == self.n_letters)
            self.decoded_answer = answer
        else: 
            self.decoded_answer = np.random.choice(self.valid_words)
            
        self.encoded_answer = self._encode(self.decoded_answer)
                
        return self.state

In [5]:
class WordleMulti(gym.Env): 
    
    def __init__(self, n_boards: int, n_letters: int, n_guesses: int, answers: list, seed: int): 
        
        # Store attributes 
        self.n_boards = n_boards
        self.n_letters = n_letters
        self.n_guesses = n_guesses 
        self.seed = seed 
        self.board_dims = (self.n_guesses, self.n_letters)
        
        # Create answers
        if answers is not None: 
            assert(isinstance(answers, list))
            assert(len(answers) == self.n_boards)
            assert([len(answer) == self.n_letters for answer in answers])
            self.answers = answers
        else: 
            self.answers = np.random.choice(env.valid_words, self.n_boards).tolist()
            
        # Valid words 
        self.valid_words = [word.lower() for word in english_words_set if len(word) == self.n_letters]

        
        # Create boards 
        self.boards = []
        for i in range(self.n_boards): 
            board = WordleSingle(n_letters = self.n_letters, 
                                 n_guesses = self.n_guesses, 
                                 valid_words = self.valid_words, 
                                 answer = self.answers[i], 
                                 seed = None)
            self.boards.append(board)
            
        
            
        # Action space 
        self.action_space = gym.spaces.Discrete( len(self.valid_words))
        
        # Observation space
        self.observation_space = [gym.spaces.Dict({
            'letters': gym.spaces.Box(low = -1, high = 25, shape = self.board_dims, dtype=int), 
            'colors': gym.spaces.Box(low = -1, high = 25, shape = self.board_dims, dtype=int)
        }) for i in range(self.n_boards)]
        
        # Create meta state 
        self.state = [board.state for board in self.boards]
                  
    def step(self, action: int): 
        
        """
        Takes the action as index from list of actions and converts to words/list of letter-numbers
        and then updates each of the boards
        {'green': 2, 
         'yellow': 1, 
         'gray': 0, 
         'empty': -1}
        """
        
        # Update each board
        logs = []
        for board in self.boards: 
            logs.append(board.step(action))
        
        return logs
            
    def reset(self): 
        
        for board in self.boards: 
            board.reset()

#### Check Env

In [None]:
from stable_baselines3.common.env_checker import check_env

In [215]:
env = WordleSingle(n_letters = 5, 
                   n_guesses = 6, 
                   valid_words = None, 
                   answer = None, 
                   seed = 88)

In [222]:
for i in range(5):
    state = env.reset()
    done = False
    won = False
    score = 0
    print(env.decoded_answer)
    while not done: 
        
        action = env.action_space.sample()
        state, reward, done, info = env.step(action)
        score += reward
        
    print(f'Episode {i} -- Score {score}')

bushy
Episode 0 -- Score -4
troll
Episode 1 -- Score -6
latex
Episode 2 -- Score -6
leggy
Episode 3 -- Score -6
sprig
Episode 4 -- Score -6
