In [4]:
# Dependencies 
import numpy as np
from english_words import english_words_set

#### Environment

In [5]:
class WordleSingle(object): 
    
    def __init__(self, 
                 n_letters: int, 
                 n_guesses: int, 
                 valid_words: list = None,
                 answer: str = None, 
                 seed: int = None): 
        
        # Store attributes 
        self.n_letters = n_letters
        self.n_guesses = n_guesses 
        self.board_dims = (self.n_guesses, self.n_letters)
        
        self.seed = seed
        np.random.seed(self.seed)
        
        # Valid words 
        if valid_words is None:
            self.valid_words = [word.lower() for word in english_words_set if len(word) == self.n_letters]
            self.valid_words = [word for word in self.valid_words if "'" not in word and "." not in word]
            self.valid_words = sorted(self.valid_words)
        else: 
            assert(isinstance(valid_words, list))
            self.valid_words = valid_words
        
        # Action space 
        self.action_space = list(range(len(self.valid_words)))
        
        # Observation space
        self.observation_space = {'letters': np.ndarray(self.board_dims, dtype = int), 
                                  'colors': np.ndarray(self.board_dims, dtype = int)}
        
        # Initialize state
        self.letters = -1 * np.ones(self.board_dims, dtype = np.int8)
        self.colors = -1 * np.ones(self.board_dims, dtype = np.int8)
        self.state = {'letters': self.letters, 'colors': self.colors}
        
        # Info variables
        self.done = False 
        self.guess_count = 0
        
        # Select and answer word
        if answer is not None:
            assert(isinstance(answer, str))
            assert(len(answer) == self.n_letters)
            
            self.decoded_answer = answer
        else: 
            self.decoded_answer = np.random.choice(self.valid_words)
            
        self.encoded_answer = self._encode(self.decoded_answer)
        self.answer = self.decoded_answer

    def _encode(self, word: str): 
        self.encoder = dict(zip(list("abcdefghijklmnopqrstuvwxyz"), np.arange(26)))
        return [self.encoder[letter] for letter in word]    
    
    def step(self, action: int): 
        
        """
        Takes the action as index from list of actions and converts to words/list of letter-numbers
        and then updates the board 
        {'green': 2, 
         'yellow': 1, 
         'gray': 0, 
         'empty': -1}
        """
        
        # Grab action 
        decoded_action = self.valid_words[action]
        encoded_action = self._encode(decoded_action)
        
        # Update letters grid 
        self.letters[self.guess_count] = encoded_action
        
        # Update colors 
        new_colors = []
        for answer_letters, guess_letter in zip(self.encoded_answer, encoded_action): 
            
            # Green letter (i.e. correct letter in correct spot)
            if guess_letter == answer_letters: 
                new_colors.append(2)
                
            # Yellow letter (i.e. correct letter in incorrect spot)
            elif guess_letter in self.encoded_answer: 
                new_colors.append(1)
                
            # Gray letter (i.e. incorrect letter)
            else: 
                new_colors.append(0)
            
        self.colors[self.guess_count] = new_colors
        
        # Update state 
        self.state = {'letters': self.letters, 'colors': self.colors}
        
    
        # Get reward
        reward = self.compute_reward()
        
        # Increment guess count 
        self.guess_count += 1
        
        # Check if done and/or won
        won = reward > 0
        self.info = {'won': won}
        self.done = (self.guess_count == self.n_guesses) or (won)
        
        return self.state, reward, self.done, self.info
    
    def compute_reward(self): 
        
        """
        Because green is 2, the max score is 2 x number of letters
        We give a score of -1 for any bad guessses
        """
        score = np.sum(self.colors[self.guess_count])
        self.max_score = 2 * self.n_letters
        reward = 1 if score == self.max_score else -1
        
        return reward      
        
    def reset(self, answer: str = None): 
        
        # Initialize state
        self.letters = -1 * np.ones(self.board_dims, dtype = np.int8)
        self.colors = -1 * np.ones(self.board_dims, dtype = np.int8)
        self.state = {'letters': self.letters, 'colors': self.colors}
        
        # Info Variables 
        self.done = False
        self.guess_count = 0
        
        # Select new answer
        if answer is not None:
            assert(isinstance(answer, str))
            assert(len(answer) == self.n_letters)
            self.decoded_answer = answer
        else: 
            self.decoded_answer = np.random.choice(self.valid_words)
            self.answer = self.decoded_answer

            
        self.encoded_answer = self._encode(self.decoded_answer)
                
        return self.state

In [21]:
class WordleMulti(): 
    
    def __init__(self, n_boards: int, n_letters: int, n_guesses: int, answers: list, seed: int): 
        
        # Store attributes 
        self.n_boards = n_boards
        self.n_letters = n_letters
        self.n_guesses = n_guesses 
        self.seed = seed 
        self.board_dims = (self.n_guesses, self.n_letters)
        
        # Valid words 
        self.valid_words = [word.lower() for word in english_words_set if len(word) == self.n_letters]
        self.valid_words = sorted(self.valid_words)

        
        # Create answers
        if answers is not None: 
            assert(isinstance(answers, list))
            assert(len(answers) == self.n_boards)
            assert([len(answer) == self.n_letters for answer in answers])
            self.answers = answers
        else: 
            self.answers = np.random.choice(self.valid_words, self.n_boards).tolist()
            

        
        # Create boards 
        self.boards = []
        for i in range(self.n_boards): 
            board = WordleSingle(n_letters = self.n_letters, 
                                 n_guesses = self.n_guesses, 
                                 valid_words = self.valid_words,
                                 answer = self.answers[i], 
                                 seed = self.seed)
            self.boards.append(board)
            
        
            
        # Action space 
        self.action_space = list(range(len(self.valid_words)))
        
        # Observation space
        self.observation_space = [board.observation_space for board in self.boards]
        
        # Create meta state 
        self.state = [board.state for board in self.boards]
                  
    def step(self, action: int): 
        
        """
        Takes the action as index from list of actions and converts to words/list of letter-numbers
        and then updates each of the boards
        {'green': 2, 
         'yellow': 1, 
         'gray': 0, 
         'empty': -1}
        """
        
        # Update each board
        logs = []
        for board in self.boards: 
            if not board.done:
                logs.append(board.step(action))
                
        # Update state 
        self.state = [board.state for board in self.boards]
        
        # Compute reward 
        rewards_list = [board.compute_reward() for board in self.boards]
        reward = np.sum(rewards_list)
        
        # Increment guess counts
        self.guess_count = np.max([board.guess_count for board in env.boards])
        
        # Check if done and/or won
        won = reward >= self.n_boards
        self.info = {'won': won}
        self.done = (self.guess_count == self.n_guesses) or (won)
        
        
        return self.state, reward, self.done, self.info
            
    def reset(self): 
        
        # Create answers
        self.answers = np.random.choice(self.valid_words, self.n_boards).tolist()
            
        
        # Create boards 
        self.boards = []
        for i in range(self.n_boards): 
            board = WordleSingle(n_letters = self.n_letters, 
                                 n_guesses = self.n_guesses, 
                                 valid_words = self.valid_words,
                                 answer = self.answers[i], 
                                 seed = self.seed)
            self.boards.append(board)
            
            
        # Create meta state 
        self.state = [board.state for board in self.boards]
           
            
        return self.state

#### Sanity Check

In [22]:
env = WordleMulti(n_boards = 4, n_letters = 5, n_guesses = 9, answers = None, seed = None)

In [23]:
env.action_space[np.random.choice(env.action_space)]

1314