In [1]:
!pip install tqdm



In [2]:
import numpy as np
import matplotlib.pyplot as plt
import time
from IPython import display
import scipy.stats as stats
from tqdm import trange
from collections import defaultdict
from typing import NamedTuple

# Implemented methods
methods = ['DynProg', 'ValIter']

# Some colours
LIGHT_RED    = '#FFC4CC'
LIGHT_GREEN  = '#95FD99'
BLACK        = '#000000'
WHITE        = '#FFFFFF'
LIGHT_PURPLE = '#E8D0FF'
LIGHT_ORANGE = '#FAE0C3'

In [3]:
class Experience(NamedTuple):
    episode: int
    state: np.ndarray
    action: int
    reward: float
    next_state: np.ndarray
    done: bool

In [4]:
def running_average(data, window_length: int = 50):
    overlap_length = np.concatenate((np.arange(1, window_length), window_length * np.ones(len(data))))
    window = np.ones(window_length)
    averages = (np.convolve(data, window) / overlap_length)[:-(window_length-1)]
    assert len(averages) == len(data)
    return averages

In [5]:
def decide_random(probability):
    return np.random.binomial(n=1, p=probability) == 1

In [32]:

import random


class Maze:

    # Actions
    STAY       = 0
    MOVE_LEFT  = 1
    MOVE_RIGHT = 2
    MOVE_UP    = 3
    MOVE_DOWN  = 4

    # Give names to actions
    actions_names = {
        STAY: "stay",
        MOVE_LEFT: "move left",
        MOVE_RIGHT: "move right",
        MOVE_UP: "move up",
        MOVE_DOWN: "move down"
    }

    # Reward values
    
    STEP_REWARD = 0         #TODO
    KEY_REWARD = 1
    GOAL_REWARD = 1         #TODO
    IMPOSSIBLE_REWARD =  0  #TODO
    LOSS_REWARD = -50      #TODO


    def __init__(self, 
                 maze, 
                 horizon: int | None = None,
                 allow_minotaur_stay: bool = False, 
                 expected_life: float = 1, 
                 minotaur_chase: bool = False, 
                 keys: bool = False
                 ):
        """ Constructor of the environment Maze.
        """
        self.maze = maze
        self.horizon = horizon
        self.temp_position = (-1, -1)
        self.poise_probability = (1/expected_life)
        self.minotaur_chase = minotaur_chase
        self.keys = keys
        self.finite_horizon = horizon
        #self.key_position             = (0,7)
        self.start_position           = ((0,0), (6,5))
        self.allow_minotaur_stay      = allow_minotaur_stay
        self.actions                  = self.__actions()
        self.states, self.map         = self.__states()
        self.n_actions                = len(self.actions)
        self.n_states                 = len(self.states)
        self._initial_state = ((0,0), (6,5), "NOKEYS")
        
        self.moves_cache = dict()
        assert not (self.poise_probability > 0 and self.finite_horizon is not None)    # poison only for discounted MDPs

        

    def __actions(self):
        actions = dict()
        actions[self.STAY]       = (0, 0)
        actions[self.MOVE_LEFT]  = (0, -1)
        actions[self.MOVE_RIGHT] = (0, 1)
        actions[self.MOVE_UP]    = (-1, 0)
        actions[self.MOVE_DOWN]  = (1, 0)
        return actions

    def __states(self):
        
        states = dict()
        map = dict()
        s = 0
        for i in range(self.maze.shape[0]):
            for j in range(self.maze.shape[1]):
                if self.maze[i,j] != 1:  # Agent cannot be in a wall
                    for k in range(self.maze.shape[0]):
                        for l in range(self.maze.shape[1]):
                            # Remove the condition self.maze[k,l] != 1
                            for has_key in ["NOKEYS", "KEYS"]:
                                states[s] = ((i, j), (k, l), has_key)
                                map[((i, j), (k, l), has_key)] = s
                                s += 1
        states[s] = ((self.temp_position), (self.temp_position), "WIN")
        map[((self.temp_position), (self.temp_position), "WIN")] = s
        s += 1 
        states[s] = ((self.temp_position), (self.temp_position), "EATEN")
        map[((self.temp_position), (self.temp_position), "EATEN")] = s
        s += 1 

        return states, map

    def get_position(self, agent_position, action, is_player): 
        x, y = agent_position
        if action == self.MOVE_UP:
            x -= 1
        elif action == self.MOVE_DOWN:
            x += 1
        elif action == self.MOVE_LEFT:
            y -= 1
        elif action == self.MOVE_RIGHT:
            y += 1
        elif action == self.STAY:
            pass
        else:
            raise ValueError(f"Invalid move {action}")
        return (x, y)
    
    def __move_minotaur(self, state, actions_minotaur, only_valid = True): 
        minotaur_actions = []
        
        # Check if the current state is terminal
        if self.terminal_state(state):
            minotaur_actions.append(self.STAY)
        elif not self.minotaur_chase: 
            if np.random.rand() < 0.35: 
                minotaur_actions = []
                mx, my = self.states[state][1][0], self.states[state][1][1]  # Minotaur's position
                ax, ay = self.states[state][0][0], self.states[state][0][1]  # Player's position
                
                # Determine the chase direction
                if ax > mx:
                    minotaur_actions.append(self.MOVE_RIGHT)
                elif ax < mx:
                    minotaur_actions.append(self.MOVE_LEFT)
                if ay > my:
                    minotaur_actions.append(self.MOVE_DOWN)
                elif ay < my:
                    minotaur_actions.append(self.MOVE_UP)
                
                # If no specific direction is determined, add all potential moves
                if not minotaur_actions:
                    minotaur_actions = [self.MOVE_UP, self.MOVE_DOWN, self.MOVE_LEFT, self.MOVE_RIGHT]
            
            # Validate actions to ensure they do not result in the Minotaur leaving the environment
            valid_actions = []
            for action in minotaur_actions:
                # Calculate the new position after applying the action
                new_position = tuple(map(sum, zip((mx, my), self.actions[action])))
                
                # Check if the new position is within the maze boundaries
                if all(0 <= new_position[i] < self.maze.shape[i] for i in range(len(new_position))):
                    valid_actions.append(action)
            
            minotaur_actions = valid_actions
        else: 
            # Use provided actions, validating them as well
            for action in actions_minotaur:
                
                new_state = tuple(map(sum, zip(state[1], self.actions[action])))
                if all(0 <= new_state[i] < self.maze.shape[i] for i in range(len(new_state))):
                    minotaur_actions.append(action)

        # Choose a random valid action
        if minotaur_actions:  # Ensure there are valid actions before choosing
            random_number = random.randint(0, len(minotaur_actions) - 1)
            return minotaur_actions[random_number]
        else:
            return self.STAY 



    def reward(self, state, action):
        assert action in self.possible_actions(state)

        next_state = self._next_state(state, action)
        reward = self._reward(state, next_state)

        return reward
    
    def _reward(self, state, next_state):
        _, _, progress = state
        _, _, next_progress = next_state

        # terminal state (absorbing): nothing happens
        if self.terminal_state(state):
            print("terminal_state")
            reward = 0
            
        # main objective: maximize probability of exiting alive before the time expires
        # <=> maximize reward by collecting the keys and exit reward
        # => positive reward for collecting keys and exiting alive
        elif next_progress != "EATEN" and \
                progress == "NOKEYS" and next_progress == "KEYS":
            reward = self.KEY_REWARD
        elif next_progress !=  "EATEN" and \
                progress == "KEYS" and next_progress == "WIN":
            print("goal reward")
            reward = self.GOAL_REWARD
        # additional objective: don't waste time while you are alive
        # <=> minimize time to exit <=> maximize negative time to exit
        # notice that for discounted MDPs, the step penalty is set to 0 in the constructor
        else:
            reward = self.STEP_REWARD

        return reward
    
    def _next_state(self, state, action, minotaur_move = None):
        
        player_position, minotaur_position, progress = state

        if self.terminal_state(state):
            print("terminal state")
            pass    # state stays the same (absorbing state)
        else:
            if minotaur_move is None:
                actions_minotaur = [self.MOVE_DOWN, self.MOVE_UP, self.MOVE_RIGHT, self.MOVE_LEFT] # Possible moves for the Minotaur
                if self.allow_minotaur_stay:
                    actions_minotaur.append(self.STAY)
                action_minotaur = self.__move_minotaur(state, actions_minotaur)
                #chase = self.minotaur_chase and np.random.rand() < 0.35
                #valid_minotaur_moves = self._valid_minotaur_moves(state, chase=chase)
                #minotaur_move = self._rng.choice(valid_minotaur_moves)

            next_player_position = self.get_position(player_position, action, True)
            next_minotaur_position = self.get_position(minotaur_position, action_minotaur, False)
            if next_player_position == (6,5): 
                print("found exit")
                print(progress)
            if next_player_position == next_minotaur_position:
                #print("progress EATEN")
                state = (self.temp_position, self.temp_position, "EATEN")
            elif progress == "KEYS" and next_player_position == (-1,-1):
                state = (self.temp_position, self.temp_position, "WIN")
            elif progress == "KEYS" and next_player_position == (6,5):
                print("progress win!")
                state = (self.temp_position, self.temp_position, "WIN")
            elif progress == "NOKEYS" and next_player_position == (0,7):
                print("progress KEYS")
                print("next state eaten: ", (next_player_position == next_minotaur_position))
                if not (next_player_position == next_minotaur_position): 
                    print("next player position: ", next_player_position)
                    print("next minotaur position: ", next_minotaur_position )
                state = (next_player_position, next_minotaur_position, "KEYS")
            else:
                state = (next_player_position, next_minotaur_position, progress)

        return state
    
    def terminal_state(self, state):
        _, _, progress = state
        if progress in ["EATEN", "WIN"]: 
            terminal = True
        else: 
            terminal = False
            
        return terminal
    
    
    def possible_actions(self, state):
        """
        build a dictionary of possible actions at each state
        """
        possible_actions = []
        if self.terminal_state(state):
            possible_actions = [self.STAY]
        else: 
            player_position, _, _ = state
            for a, a_delta in self.actions.items():
                new_state = tuple(map(sum, zip(player_position, a_delta)))
                if all(0 <= new_state[i] < self.maze.shape[i] for i in range(len(new_state))):
                    if self.maze[new_state] != 1: 
                        possible_actions.append(a)

        return possible_actions
    
    def _horizon_reached(self):
        # random time horizon geometrically distributed
        if self.poise_probability > 0:
            horizon_reached = decide_random(self.poise_probability)
        else:
            print("need _nsteps")
            horizon_reached = self._n_steps >= self.horizon if self.finite_horizon is not None else False
        return horizon_reached

    
    def reset(self):
        self._current_state = self._initial_state
        self._n_steps = 0
        return self._current_state
    
    def step(self, action: int):
        # update state
        previous_state = self._current_state
        new_state = self._next_state(previous_state, action)
        
        
        self._current_state = new_state
        # calculate reward
        reward = self.reward(previous_state, action)
        ax, _, progress = self._current_state
        # check end of episode
        self._n_steps += 1
        
        if reward != 0: 
            _, _, reward_prev = previous_state
            print("progress previous state: ", reward_prev)
            print("progress current state: ", progress)
            print("reward current state: ", reward)
        #if ax == self.temp_position: 
            #print("out of field")
        if progress in ["WIN", "EATEN"]: 
            if progress =="KEYS": 
                print("even has keys")
            #print("should be done")
        if self._horizon_reached(): 
            if progress =="KEYS": 
                print("horizon reached with  keys")
        if self.terminal_state(self._current_state): 
            if progress != "EATEN": 
                print("won once")
            # print("terminal_state with progress: ", progress)
        done = self._horizon_reached() or self.terminal_state(self._current_state)

        win = (progress == "WIN")
        
        return self._current_state, reward, done, win
    

    def simulate(self, start, policy, method):
        if method not in methods:
            error = 'ERROR: the argument method must be in {}'.format(methods)
            raise NameError(error)

        path = list()
        
            
        if method == "QLearning":
            t = 0  # Initialize current time
            s = self.map[start]
            path.append(start)
            while True:
                # Include poison probability
                if np.random.rand() < 1/50:
                    path.append('Dead')
                    break

                a = policy[s]  # Action from Q-learning policy
                next_states = self.__move(s, a)
                random_number = random.randint(0, len(next_states) - 1)
                next_s = next_states[random_number]
                path.append(next_s)
                if next_s in ['Win', 'Eaten', 'Dead']:
                    break
                s = self.map[next_s]
                t += 1
            horizon = t

        return [path, horizon] 

    
    
    

In [20]:
class QLearning(): 
    def __init__(
            self,
            env, 
            epsilon: float | str,
            discount: float, 
            learning_rate: float | str,
            alpha: float | None = None,
            epsilon_decay_duration: int | None = None,
            delta: float | None = None,
            q_init: float = 0,
            seed: int | None = None
    ):
        self.env = env
        self.discount = discount
        self.learning_rate = learning_rate
        self.epsilon = epsilon
        self.epsilon = epsilon
        self.epsilon_decay_episodes = epsilon_decay_duration
        self.delta = delta
        self.alpha = alpha
        self.q_init = q_init
        self._exploration_decay = self.delta is not None
        print(self.env.states)
        self._q = [
            (self.q_init if not env.terminal_state(env.states[state]) else 0) *
            np.ones(len(self.env.possible_actions(env.states[state])))
            for state in self.env.states
        ]
        #print(self.env.possible_actions)
        self._n = [np.zeros(len(self.env.possible_actions(env.states[state]))) for state in self.env.states]

        self._last_experience = None
        
    def q(self, state, action):
        s = self.env.map[state]
        a = self.possible_actions(state, action)
        q = self._q[s][a]
        return q
    
    def _action_index(self, state, action):
        #print("original action is: ", action)
        valid_actions = self.env.possible_actions(state)
        #print("valid actions: ", valid_actions)
        
        # Find the index of the action in the valid_actions list
        try:
            action_index = valid_actions.index(action)
            #print("Action index is: ", action_index)
            return action_index
        except ValueError:
            print("Action not found in valid actions!")
            return -1 
    
    def v(self, state):
        s = self.env.map[state]
        v = max(self._q[s])
        return v
    def compute_action(
            self,
            state,
            episode: int | None = None,
            explore: bool = True,
    ):

        assert not (explore and episode is None)
        valid_actions = self.env.possible_actions(state)
        # Compute epsilon according to exploration strategy
        if explore:
            epsilon = self.epsilon
        else:
            epsilon = None

        # Epsilon-greedy policy (or greedy policy if explore=False)
        if explore and decide_random(epsilon):
            random_number = random.randint(0, len(valid_actions) - 1)
            action = valid_actions[random_number]
            
        else:
            s = self.env.map[state]
            v = self.v(state)
            # random_number = random.randint(0, len(valid_actions) - 1)
            a = random.choice(np.asarray(self._q[s] == v).nonzero()[0])      # random among those with max Q-value
            action = valid_actions[a]

        return action
    
    def record_experience(self, experience) -> None:
        self._last_experience = experience
        
    def update(self) -> dict:
        # Unpack last experience
        state = self._last_experience.state
        action = self._last_experience.action
        reward = self._last_experience.reward
        next_state = self._last_experience.next_state

        # Get indices
        s = self.env.map[state]

        a = self._action_index(state, action)
        s_next = self.env.map[next_state]
        # Update Q-function
        self._n[s][a] += 1
        step_size = 1 / (self._n[s][a] ** self.alpha)
        self._q[s][a] += step_size * (reward + self.discount * max(self._q[s_next]) - self._q[s][a])

        return {}

    def train(
            self,
            n_episodes: int,
            train: bool = True
    ) -> dict:
        
        stats = defaultdict(list)
        episodes = trange(1, n_episodes + 1, desc='Episode: ', leave=True)

        for episode in episodes:
            # Reset environment data and initialize variables
            done = False
            state = self.env.reset()
            episode_reward = 0
            episode_length = 0

            # Run episode
            while not done:
                # Interact with the environment
                action = self.compute_action(state=state, episode=episode, explore=train)
                next_state, reward, done, win = self.env.step(action)
                # Update policy
                if train:
                    experience = Experience(
                        episode=episode,
                        state=state,
                        action=action,
                        reward=reward,
                        next_state=next_state,
                        done=done
                    )
                    self.record_experience(experience)
                    update_stats = self.update()

                    # Update stats
                    for k, v in update_stats.items():
                        if isinstance(v, list):
                            stats[k].extend(v)
                        else:
                            stats[k].append(v)
                episode_reward += reward
                episode_length += 1

                # Update state
                state = next_state

            # Update stats
            stats["episode_reward"].append(episode_reward)
            stats["episode_length"].append(episode_length)

            # Show progress
            avg_episode_length = running_average(stats["episode_length"])[-1]
            avg_episode_reward = running_average(stats["episode_reward"])[-1]
            episodes.set_description(
                f"Episode {episode} - "
                f"Reward: {episode_reward:.1f} - "
                f"Length: {episode_length} - "
                f"Avg reward: {avg_episode_reward:.1f} - "
                f"Avg length: {avg_episode_length:.1f}"
            )

        return stats

In [21]:
def minotaur_maze_exit_probability(environment, agent):
    n_episodes = 10
    n_wins = 0
    for episode in range(1, n_episodes+1):
        done = False
        time_step = 0
        # environment.seed(episode)
        state = environment.reset()
        while not done:
            action = agent.compute_action(state=state, episode=episode, explore=False)
            state, _, done, won = environment.step(action)
            time_step += 1
            if won: 
                print("WON")
            n_wins += 1 if won else 0
    exit_probability = n_wins / n_episodes
    return exit_probability

In [34]:
NUM_EPISODES = 5000
maze = np.array([
    [0, 0, 1, 0, 0, 0, 0, 0],
    [0, 0, 1, 0, 0, 1, 0, 0],
    [0, 0, 1, 0, 0, 1, 1, 1],
    [0, 0, 1, 0, 0, 1, 0, 0],
    [0, 0, 0, 0, 0, 0, 0, 0],
    [0, 1, 1, 1, 1, 1, 1, 0],
    [0, 0, 0, 0, 1, 2, 0, 0]])

# Initialize the environment
restart_state = ((0,0), (6,5))
expected_life = 50 
gamma = 1 / (1-expected_life)
epsilon = 0.1
env = Maze(maze, expected_life=expected_life, minotaur_chase=True, keys=True)
agent_q_learning = QLearning(
        env=env,
        learning_rate="decay",
        discount=gamma,
        alpha=0.55,
        epsilon=0.2,
        delta=None,
        q_init=0.01,
    )
agent_q_learning.train(NUM_EPISODES)

gamma = 0.1

record_interval = 100



{0: ((0, 0), (0, 0), 'NOKEYS'), 1: ((0, 0), (0, 0), 'KEYS'), 2: ((0, 0), (0, 1), 'NOKEYS'), 3: ((0, 0), (0, 1), 'KEYS'), 4: ((0, 0), (0, 2), 'NOKEYS'), 5: ((0, 0), (0, 2), 'KEYS'), 6: ((0, 0), (0, 3), 'NOKEYS'), 7: ((0, 0), (0, 3), 'KEYS'), 8: ((0, 0), (0, 4), 'NOKEYS'), 9: ((0, 0), (0, 4), 'KEYS'), 10: ((0, 0), (0, 5), 'NOKEYS'), 11: ((0, 0), (0, 5), 'KEYS'), 12: ((0, 0), (0, 6), 'NOKEYS'), 13: ((0, 0), (0, 6), 'KEYS'), 14: ((0, 0), (0, 7), 'NOKEYS'), 15: ((0, 0), (0, 7), 'KEYS'), 16: ((0, 0), (1, 0), 'NOKEYS'), 17: ((0, 0), (1, 0), 'KEYS'), 18: ((0, 0), (1, 1), 'NOKEYS'), 19: ((0, 0), (1, 1), 'KEYS'), 20: ((0, 0), (1, 2), 'NOKEYS'), 21: ((0, 0), (1, 2), 'KEYS'), 22: ((0, 0), (1, 3), 'NOKEYS'), 23: ((0, 0), (1, 3), 'KEYS'), 24: ((0, 0), (1, 4), 'NOKEYS'), 25: ((0, 0), (1, 4), 'KEYS'), 26: ((0, 0), (1, 5), 'NOKEYS'), 27: ((0, 0), (1, 5), 'KEYS'), 28: ((0, 0), (1, 6), 'NOKEYS'), 29: ((0, 0), (1, 6), 'KEYS'), 30: ((0, 0), (1, 7), 'NOKEYS'), 31: ((0, 0), (1, 7), 'KEYS'), 32: ((0, 0), (2, 

Episode 112 - Reward: 0.0 - Length: 17 - Avg reward: 0.0 - Avg length: 38.5:   1%|▏         | 66/5000 [00:00<00:07, 651.65it/s] 

progress KEYS
next state eaten:  False
next player position:  (0, 7)
next minotaur position:  (3, 1)
progress KEYS
next state eaten:  False
next player position:  (0, 7)
next minotaur position:  (2, 0)
progress previous state:  NOKEYS
progress current state:  KEYS
reward current state:  1


Episode 283 - Reward: 0.0 - Length: 3 - Avg reward: 0.0 - Avg length: 36.9:   5%|▌         | 260/5000 [00:00<00:08, 541.85it/s]  

progress KEYS
next state eaten:  False
next player position:  (0, 7)
next minotaur position:  (3, 1)
progress KEYS
next state eaten:  False
next player position:  (0, 7)
next minotaur position:  (4, 2)
progress previous state:  NOKEYS
progress current state:  KEYS
reward current state:  1
horizon reached with  keys


Episode 1011 - Reward: 0.0 - Length: 67 - Avg reward: 0.0 - Avg length: 44.1:  20%|██        | 1006/5000 [00:01<00:08, 456.45it/s]

progress KEYS
next state eaten:  False
next player position:  (0, 7)
next minotaur position:  (6, 6)
progress KEYS
next state eaten:  False
next player position:  (0, 7)
next minotaur position:  (6, 6)
progress previous state:  NOKEYS
progress current state:  KEYS
reward current state:  1
horizon reached with  keys
horizon reached with  keys
horizon reached with  keys
horizon reached with  keys


Episode 2489 - Reward: 0.0 - Length: 36 - Avg reward: 0.0 - Avg length: 42.0:  49%|████▊     | 2437/5000 [00:04<00:05, 500.01it/s] 

found exit
NOKEYS
found exit
NOKEYS
found exit
NOKEYS
found exit
NOKEYS
found exit
NOKEYS
found exit
NOKEYS
found exit
NOKEYS
found exit
NOKEYS
found exit
NOKEYS
found exit
NOKEYS
found exit
NOKEYS
found exit
NOKEYS
found exit
NOKEYS
found exit
NOKEYS


Episode 5000 - Reward: 0.0 - Length: 130 - Avg reward: 0.0 - Avg length: 43.6: 100%|██████████| 5000/5000 [00:09<00:00, 538.53it/s]


In [None]:
exit_probability = minotaur_maze_exit_probability(env, agent_q_learning)



In [None]:
print(f"P('exit alive'|'poisoned')={exit_probability}")

P('exit alive'|'poisoned')=0.0


In [None]:
print(env.map)

{((0, 0), (0, 0), 'NOKEYS'): 0, ((0, 0), (0, 0), 'KEYS'): 1, ((0, 0), (0, 1), 'NOKEYS'): 2, ((0, 0), (0, 1), 'KEYS'): 3, ((0, 0), (0, 2), 'NOKEYS'): 4, ((0, 0), (0, 2), 'KEYS'): 5, ((0, 0), (0, 3), 'NOKEYS'): 6, ((0, 0), (0, 3), 'KEYS'): 7, ((0, 0), (0, 4), 'NOKEYS'): 8, ((0, 0), (0, 4), 'KEYS'): 9, ((0, 0), (0, 5), 'NOKEYS'): 10, ((0, 0), (0, 5), 'KEYS'): 11, ((0, 0), (0, 6), 'NOKEYS'): 12, ((0, 0), (0, 6), 'KEYS'): 13, ((0, 0), (0, 7), 'NOKEYS'): 14, ((0, 0), (0, 7), 'KEYS'): 15, ((0, 0), (1, 0), 'NOKEYS'): 16, ((0, 0), (1, 0), 'KEYS'): 17, ((0, 0), (1, 1), 'NOKEYS'): 18, ((0, 0), (1, 1), 'KEYS'): 19, ((0, 0), (1, 2), 'NOKEYS'): 20, ((0, 0), (1, 2), 'KEYS'): 21, ((0, 0), (1, 3), 'NOKEYS'): 22, ((0, 0), (1, 3), 'KEYS'): 23, ((0, 0), (1, 4), 'NOKEYS'): 24, ((0, 0), (1, 4), 'KEYS'): 25, ((0, 0), (1, 5), 'NOKEYS'): 26, ((0, 0), (1, 5), 'KEYS'): 27, ((0, 0), (1, 6), 'NOKEYS'): 28, ((0, 0), (1, 6), 'KEYS'): 29, ((0, 0), (1, 7), 'NOKEYS'): 30, ((0, 0), (1, 7), 'KEYS'): 31, ((0, 0), (2, 0), 