In [1]:
"""Q Learning to solve a simple world model

Example "world" displayed below
---------------------------------
|         |          |          |
|  Start  |          |  Hole    |
|         |          |          |
---------------------------------
|         |          |          |
|         |          |  Goal    |
|         |          |          |
---------------------------------

"""

import numpy as np
import argparse
import os
import time
from   termcolor   import colored
from   collections import deque

In [2]:
def is_iterable( x ):
    try:
        iter(x)
        return True
    except Exception:
        return False

In [3]:
class QWorld( ):
    def __init__( self , width=3 , height=2 , s0=0 , goal_states=[] , hole_states=[] , 
                  wall_states=[] , gamma=0.9 , epsilon=0.9 , epsilon_decay=0.9 , 
                  epsilon_min=0.1 ):
        
        # 4 actions
        # 0 - Left, 1 - Down, 2 - Right, 3 - Up
        self.col = 4

        # shape of "maze"
        self.width  = width  if width*height >= 2 else 3
        self.height = height if width*height >= 2 else 2
        
        # 9 states
        self.row = self.width * self.height
        
        # starting state
        s0      = self.convert_to_state( s0 )
        self.s0 = s0 if s0 != -1 else 0
        
        # Location(s) of the goal(s) -- assure at least one
        goal_states      = [ self.convert_to_state(g) for g in goal_states ]
        self.goal_states = [ g for g in goal_states if g != self.s0 and g != -1 ]
        self.goal_states = self.goal_states if self.goal_states else [ self.row - 1 ]
        
        # Location(s) of the hole(s) -- ensure hole != goal
        hole_states      = [ self.convert_to_state(h) for h in hole_states ]
        self.hole_states = [ h for h in hole_states 
                               if  h != s0 
                               and h != -1 
                               and h not in self.goal_states ]
        
        # Location(s) of the wall(s) -- ensure wall isn't at hole/goal/start
        wall_states      = [ self.convert_to_state(w) for w in wall_states ]
        self.wall_states = [ w for w in wall_states
                               if  w != s0
                               and w != -1
                               and w not in self.goal_states
                               and w not in self.hole_states ]

        # setup the environment
        self.q_table = np.zeros( [ self.row , self.col ] )
        self.init_transition_table( )
        self.init_reward_table( )

        # discount factor
        self.gamma = gamma if gamma >= 0 and gamma <= 1 else 0.9

        # 90% exploration, 10% exploitation
        self.epsilon = epsilon if epsilon >= 0 and epsilon <= 1 else 0.9
        
        # exploration decays by this factor every episode
        self.epsilon_decay = epsilon_decay if epsilon_decay >= 0 and epsilon_decay <= 1 else 0.9
        
        # in the long run, 10% exploration, 90% exploitation
        self.epsilon_min = epsilon_min if epsilon_min >= 0 and epsilon_min <= self.epsilon else min(self.epsilon, 0.1)

        # reset the environment
        self.reset( )
        self.is_explore = True
        
        
        
    def is_valid_state( self , s ):
        return s >= 0 and s < self.row
        
        
        
    def convert_to_state( self , s ):
        if is_iterable( s ) and len( s ) == 2:
            s = s[1]*self.width + s[0]
        if type( s ) is not int:
            s = -1
        if not self.is_valid_state( s ):
            s = -1
        return s
    
    
    
    def get_state_left( self , s ):
        ls = s - 1
        return ls if s % self.width > 0 else -1
    
    
    
    def get_state_down( self , s ):
        ds = s + self.width
        return ds if ds < self.row else -1
    
    
    
    def get_state_right( self , s ):
        rs = s + 1
        return rs if rs % self.width > 0 else -1
    
    
    
    def get_state_up( self , s ):
        us = s - self.width
        return us if us >= 0 else -1
        
        
        
    def reset_tables( self ):
        self.q_table = np.zeros( self.q_table.shape )
        self.init_transition_table( )
        self.init_reward_table( )
        
        
        
    # if s0 is an int, assume it is the desired start state
    # if s0 is a tuple, then it is of the form (x,y) for the position of the starting state
    def change_s0( self , s0 ):
        s0 = self.convert_to_state( s0 )
        if s0 != -1:
            s0      = self.s0 if s0 in self.goal_states + self.hole_states + self.wall_states else s0
            self.s0 = s0 if self.is_valid_state( s0 ) else self.s0
            self.reset_tables( )
            self.reset( )
        
        
    
    def change_grid_dimensions( self , width , height ):
        # update metadata values
        w            = self.width
        h            = self.height
        self.width   = width  if w*h >= 2 else self.width
        self.height  = height if w*h >= 2 else self.height
        self.row     = self.width * self.height
        update_state = lambda x: (x//w)*self.width + (x%w)
        
        # reposition goal and hole states to maintain same coordinates
        self.goal_states = [ update_state(g) for g in self.goal_states if update_state(g) < self.row ]
        self.hole_states = [ update_state(h) for h in self.hole_states if update_state(h) < self.row ]
        self.wall_states = [ update_state(w) for w in self.wall_states if update_state(w) < self.row ]
        
        # update initial state to maintain same coordinates
        self.s0 = update_state( self.s0 )
        if self.s0 >= self.row:
            self.s0 = 0
            self.goal_states = [ g for g in self.goal_states if g != self.s0 ]
            self.hole_states = [ h for h in self.hole_states if h != self.s0 ]
            self.wall_states = [ w for w in self.wall_states if w != self.s0 ]
        
        # resize and reset the tables
        self.q_table = np.zeros( [ self.row , self.col ] )
        self.reset_tables( )
        self.reset( )
        
        
        
    def add_goal_state( self , s ):
        g = self.convert_to_state( s )
        g = -1 if g == self.s0 or g in self.hole_states + self.goal_states + self.wall_states else g
        if g != -1:
            self.goal_states.append( g )
            self.reset_tables( )
            self.reset( )
            
            
            
    def add_hole_state( self , s ):
        h = self.convert_to_state( s )
        h = -1 if h == self.s0 or h in self.goal_states + self.hole_states + self.wall_states else h
        if h != -1:
            self.hole_states.append( h )
            self.reset_tables( )
            self.reset( )
            
            
            
    def add_wall_state( self , s ):
        w = self.convert_to_state( s )
        w = -1 if w == self.s0 or w in self.goal_states + self.hole_states + self.wall_states else w
        if w != -1:
            self.wall_states.append( w )
            self.reset_tables( )
            self.reset( )
        
             
            
    def remove_goal_state( self , s ):
        s = self.convert_to_state( s )
        # can't have no goal states
        if len(self.goal_states) > 1 and s != -1:
            self.goal_states = [ g for g in self.goal_states if g != s]
            self.reset_tables( )
            self.reset( )
            
                
                
    def remove_hole_state( self , s ):
        s = self.convert_to_state( s )
        if s != -1:
            self.hole_states = [ h for h in self.hole_states if h != s ]
            self.reset_tables( )
            self.reset( )
            
            
            
    def remove_wall_state( self , s ):
        s = self.convert_to_state( s )
        if s != -1:
            self.wall_states = [ w for w in self.wall_states if w != s ]
            self.reset_tables( )
            self.reset( )
                
                
            
    def replace_goal_states_list( self , s ):
        if not is_iterable( s ):
            s = list( s )
        if is_iterable( s ) and len( s ) > 0:
            new_goal_states = list( )
            for state in s:
                x = self.convert_to_state( state )
                if x != -1 and x != self.s0 and x not in self.hole_states + self.wall_states:
                    new_goal_states.append( x )
            if len( new_goal_states ) > 0:
                self.goal_states = new_goal_states
                self.reset_tables( )
                self.reset( )
                
                
                
    def replace_hole_states_list( self , s ):
        if not is_iterable( s ):
            s = list( s )
        if is_iterable( s ) and len( s ) > 0:
            self.hole_states = list( )
            for state in s:
                x = self.convert_to_state( state )
                if x != -1 and x != self.s0 and x not in self.goal_states + self.wall_states:
                    self.hole_states.append( x )
            self.reset_tables( )
            self.reset( )
            
            
            
    def replace_wall_states_list( self , s ):
        if not is_iterable( s ):
            s = list( s )
        if is_iterable( s ) and len( s ) > 0:
            self.wall_states = list( )
            for state in s:
                x = self.convert_to_state( state )
                if x != -1 and x != self.s0 and x not in self.goal_states + self.hole_states:
                    self.wall_states.append( x )
            self.reset_tables( )
            self.reset( )
       
    
    
    def change_epsilon( self , epsilon ):
        if epsilon <= 1 and epsilon >= 0 and self.epsilon_min <= epsilon:
            self.epsilon = epsilon
            
    
    
    def change_epsilon_min( self , epsilon_min ):
        if epsilon_min <= 1 and epsilon_min >= 0 and epsilon_min <= self.epsilon:
            self.epsilon_min = epsilon_min
        
    
            
    # start of episode
    def reset( self ):
        self.state = self.s0
        return self.state

    
    
    # agent wins when the goal is reached
    def is_in_win_state( self ):
        return self.state in self.goal_states
    
    
    
    def update_cell_rewards( self , s , r ):
        ls = self.get_state_left( s )
        ds = self.get_state_down( s )
        rs = self.get_state_right( s )
        us = self.get_state_up( s )
        
        # When Cell Left Moves Right
        self.reward_table[ls,2] = float(r) if ls != -1 else self.reward_table[ls,2]
        
        # When Cell Above Moves Down
        self.reward_table[us,1] = float(r) if us != -1 else self.reward_table[us,1]
        
        # When Cell Below Moves Up
        self.reward_table[ds,3] = float(r) if ds != -1 else self.reward_table[ds,3]
        
        # When Cell Right Moves Left
        self.reward_table[rs,0] = float(r) if rs != -1 else self.reward_table[rs,0]
            
            

    def init_reward_table( self ):
        """
        0 - Left, 1 - Down, 2 - Right, 3 - Up
       -100 = Worst Score Possible (fell in a Hole)
       100  = Best Score Possible (Reached the Goal)
        """
        self.reward_table = np.zeros([self.row, self.col])
        for g in self.goal_states:
            self.update_cell_rewards( g , 100. )
        for h in self.hole_states:
            self.update_cell_rewards( h , -100. )

            

    def init_transition_table(self):
        """
        0 - Left, 1 - Down, 2 - Right, 3 - Up
        -------------
        | 0 | 1 | 2 |
        -------------
        | 3 | 4 | 5 |
        -------------
        | 6 | 7 | 8 |
        -------------
        """
        self.transition_table = np.zeros([self.row, self.col], dtype=int)
        for s in range(self.row):
            if s in self.goal_states + self.hole_states + self.wall_states:
                self.transition_table[s,:] = s
            else:
                ls = self.get_state_left( s )
                ds = self.get_state_down( s )
                rs = self.get_state_right( s )
                us = self.get_state_up( s )
                self.transition_table[s,0] = ls if ls != -1 and ls not in self.wall_states else s
                self.transition_table[s,1] = ds if ds != -1 and ds not in self.wall_states else s
                self.transition_table[s,2] = rs if rs != -1 and rs not in self.wall_states else s
                self.transition_table[s,3] = us if us != -1 and us not in self.wall_states else s
        

        
    # execute the action on the environment
    def step( self , action ):
        # determine the next_state given state and action
        next_state = self.transition_table[self.state, action]
        
        # done is True if next_state is Goal or Hole
        done = next_state in self.goal_states or next_state in self.hole_states
        
        # reward given the state and action
        reward = self.reward_table[self.state, action]
        
        # the enviroment is now in new state 
        self.state = next_state
        return next_state, reward, done

    
    
    # determine the next action
    def act( self ):
        # 0 - Left, 1 - Down, 2 - Right, 3 - Up
        # valid move options for current state
        options = [ i for i in range(4) if self.transition_table[self.state,i] != self.state ] 
        
        # action is from exploration
        if np.random.rand( ) <= self.epsilon:
            # explore - do random action
            self.is_explore = True
            return np.random.choice( options )

        # or action is from exploitation
        # exploit - choose action with max Q-value (randomly select one if multiple are tied)
        self.is_explore = False
        
        # don't exploit into wall, don't get stuck in infinite loop when max is tied in opposite directions
        max_reward = max( self.q_table[self.state,options] )
        options    = [ o for o in options if self.q_table[self.state,o] == max_reward ]
        if len( options ) > 1:
            return np.random.choice(options)  # if multiple argmax, then pick a random one
        else:
            return max(options, key=lambda x: self.q_table[self.state,x])

        

    # Q-Learning - update the Q Table using Q(s, a)
    def update_q_table(self, state, action, reward, next_state):
        # Q(s, a) = reward + gamma * max_a' Q(s', a')
        q_value = reward + self.gamma * np.amax(self.q_table[next_state])
        self.q_table[state, action] = q_value


        
    # UI to dump Q Table contents
    def print_q_table(self):
        print("Q-Table (Epsilon: %0.2f)" % self.epsilon)
        print(self.q_table)

        

    # update Exploration-Exploitation mix
    def update_epsilon(self):
        if self.epsilon > self.epsilon_min:
            self.epsilon *= self.epsilon_decay

            

    # UI to display agent moving on the grid
    def print_cell(self, row=0):
        print("")
        for i in range(4*self.width+1):
            j = i - 2
            # if center of cell, print symbol
            if j % 4 == 0:
                cellnum = row*self.width + j//4
                # if we are printing a G
                if cellnum in self.goal_states:
                    # red underlined G if state is here
                    if self.state == cellnum:
                        marker = "\033[4mG\033[0m"
                        color  = 'red'
                    # blue G if state is not here
                    else:
                        marker = 'G'
                        color  = 'blue'
                # if we are printing an H
                elif cellnum in self.hole_states:
                    # Red underlined H if state is here
                    if self.state == cellnum:
                        marker = "\033[4mH\033[0m"
                        color  = 'red'
                    # Blue H if state is not here
                    else:
                        marker = 'H'
                        color  = 'blue'
                # If we are printing a Wall cell
                elif cellnum in self.wall_states:
                    marker = 'W'
                    color  = 'black'
                # if printing current state and not goal/hole, print red x
                elif self.state == cellnum:
                    marker = 'x'
                    color  = 'red'
                # printing an empty cell
                else:
                    marker = ' ' #str(row*self.width + j//4)
                    color  = 'blue'
                # print the appropriate cell character with the appropriate color
                if color is not 'black':
                    print(colored(marker, color), end='')
                else:
                    print(marker, end='')
            # Print the divider between cells
            elif i % 4 == 0:
                    print('|', end='')
            # Print cell padding
            else:
                print(' ', end='')
        print("")


        
    # UI to display mode and action of agent
    def print_world(self, action, step):
        actions = { -1: "(Start)", 0: "(Left)", 1: "(Down)", 2: "(Right)", 3: "(Up)" }
        explore = "Explore" if self.is_explore else "Exploit"
        explore = 'Initialize' if action == -1 else explore
        divider = '-' * (4*self.width+1)
        print("Step", step, ":", explore, actions[action])
        print(divider, end='')
        for r in range(self.height):
            self.print_cell(row=r)
            print(divider, end='')
        print("")


In [4]:
# UI to display episode count
def print_episode( episode , delay=1 ):
    print( '=' * 13 )
    print( "Episode " , episode )
    print( '=' * 13 )
    time.sleep( delay )

    

# UI to display the world, delay of 1 sec for ease of understanding
def print_status( q_world , done , step , action , delay=1 ):
    q_world.print_world( action , step )
    if done:
        q_world.print_q_table( ) # used to not be inside "if done:"
        print( "-------EPISODE DONE--------" )
        delay *= 2
    time.sleep( delay )

    

isTraining_small = False

if isTraining_small:
    maxwins = 10
    delay   = 1
else:
    maxwins = 2000
    delay   = 0


In [5]:
# === Set Up Metadata === #
delay = 0      # time waiting before printing next step/episode
w , h = 5 , 5  # desired dimensions of the grid
wins  = 0      # number of times goal state is reached
step  = 1      # initializing step counter for loop iteration below
number_printed    = 1
episode_count     = 10 * maxwins
episode_interval  = episode_count // number_printed  # always print 10 episodes

# scores (max number of steps bef goal) - good indicator of learning
scores = deque( maxlen=maxwins )


# === Initialize the Board === #
q_world = QWorld( )
q_world.change_grid_dimensions( width=20 , height=6 )
q_world.replace_goal_states_list( [(19,5)] )
q_world.replace_wall_states_list( [(1,0),(1,1),(2,1),(3,1),
                                   (0,3),(1,3),(1,4),
                                   (3,3),(3,4),(3,5),(4,5),(5,5),
                                   (5,3),(5,2),(5,1),(5,0),
                                   (7,4),(8,4),(9,4),(10,4),(11,4),(7,3),(7,2),(8,2),(9,2),(10,2),(10,5),
                                   (7,0),(8,0),(9,0),(10,0),(11,0),(12,0),
                                   (12,2),(13,2),(14,2),(14,1),(15,1),(16,1),(17,1),(18,1),(19,1),
                                   (13,4),(14,4),(15,4),(16,4),(16,5),(17,5),(18,5),
                                   (16,3),(17,3),(18,3)] )
q_world.replace_hole_states_list( [(2,0),(0,4),(8,3),(6,0),(9,5),(11,5),(19,0),(15,5),(17,4)] )

#q_world.change_grid_dimensions( width=w , height=h )
#q_world.change_s0( (2,2) )
#q_world.replace_goal_states_list( [ (4,4) ] )
#q_world.replace_hole_states_list( [ (1,2) , (3,2) , (1,3) , (3,3) ] )
#q_world.replace_wall_states_list( [ (1,1) , (2,1) , (3,1) , (3,4) ] )


# state, action, reward, next state iteration
for episode in range( episode_count ):
    # === Reset Board and Show Start of New Episode === #
    do_print = episode % episode_interval == 0 or episode == (episode_count - 1)
    state = q_world.reset( )
    done  = False
    if do_print:
        print_episode( episode , delay=delay )
        print_status( q_world , done , step=0 , action=-1 , delay=delay )
    
    # === Act in Steps Until Terminal State Reached === #
    while not done:
        # === Pick an Action, Do it, Learn, Print What was Done === #
        action = q_world.act( )  # Pick Action
        next_state, reward, done = q_world.step( action )  # Perform the Action
        q_world.update_q_table( state , action , reward , next_state )  # Learn from the Action
        if do_print:
            print_status( q_world , done , step , action , delay=delay )  # Print Action Taken
        
        # === Prepare for Next Episode or Finish Episode === #
        state = next_state  # Update the current state
        # if episode is done, perform housekeeping
        if done:
            if q_world.is_in_win_state( ):
                wins += 1
                scores.append( step )
            # Exploration-Exploitation is updated every episode
            q_world.update_epsilon( )  # more learned = less explore
            step = 1  # Reset the Step Counter
        else:
            step += 1  # Ready for Next Step

# === Print the Final Scores and Q Table === #
print( scores )
q_world.print_q_table( )

Episode  0
Step 0 : Initialize (Start)
---------------------------------------------------------------------------------
| [31mx[0m | W | [34mH[0m | [34m [0m | [34m [0m | W | [34mH[0m | W | W | W | W | W | W | [34m [0m | [34m [0m | [34m [0m | [34m [0m | [34m [0m | [34m [0m | [34mH[0m |
---------------------------------------------------------------------------------
| [34m [0m | W | W | W | [34m [0m | W | [34m [0m | [34m [0m | [34m [0m | [34m [0m | [34m [0m | [34m [0m | [34m [0m | [34m [0m | W | W | W | W | W | W |
---------------------------------------------------------------------------------
| [34m [0m | [34m [0m | [34m [0m | [34m [0m | [34m [0m | W | [34m [0m | W | W | W | W | [34m [0m | W | W | W | [34m [0m | [34m [0m | [34m [0m | [34m [0m | [34m [0m |
---------------------------------------------------------------------------------
| W | W | [34m [0m | W | [34m [0m | W | [34m [0m | W | [34mH[0m | [34m 

| [34m [0m | W | [34mH[0m | [34m [0m | [34m [0m | W | [34mH[0m | W | W | W | W | W | W | [34m [0m | [34m [0m | [34m [0m | [34m [0m | [34m [0m | [34m [0m | [34mH[0m |
---------------------------------------------------------------------------------
| [34m [0m | W | W | W | [34m [0m | W | [34m [0m | [34m [0m | [34m [0m | [34m [0m | [34m [0m | [34m [0m | [34m [0m | [34m [0m | W | W | W | W | W | W |
---------------------------------------------------------------------------------
| [31mx[0m | [34m [0m | [34m [0m | [34m [0m | [34m [0m | W | [34m [0m | W | W | W | W | [34m [0m | W | W | W | [34m [0m | [34m [0m | [34m [0m | [34m [0m | [34m [0m |
---------------------------------------------------------------------------------
| W | W | [34m [0m | W | [34m [0m | W | [34m [0m | W | [34mH[0m | [34m [0m | [34m [0m | [34m [0m | [34m [0m | [34m [0m | [34m [0m | [34m [0m | W | W | W | [34m [0m |
------------

| [34m [0m | W | W | W | [34m [0m | W | [34m [0m | [34m [0m | [34m [0m | [34m [0m | [34m [0m | [34m [0m | [34m [0m | [34m [0m | W | W | W | W | W | W |
---------------------------------------------------------------------------------
| [34m [0m | [31mx[0m | [34m [0m | [34m [0m | [34m [0m | W | [34m [0m | W | W | W | W | [34m [0m | W | W | W | [34m [0m | [34m [0m | [34m [0m | [34m [0m | [34m [0m |
---------------------------------------------------------------------------------
| W | W | [34m [0m | W | [34m [0m | W | [34m [0m | W | [34mH[0m | [34m [0m | [34m [0m | [34m [0m | [34m [0m | [34m [0m | [34m [0m | [34m [0m | W | W | W | [34m [0m |
---------------------------------------------------------------------------------
| [34mH[0m | W | [34m [0m | W | [34m [0m | [34m [0m | [34m [0m | W | W | W | W | W | [34m [0m | W | W | W | W | [34mH[0m | [34m [0m | [34m [0m |
---------------------------------------

| [34m [0m | [34m [0m | [34m [0m | [34m [0m | [34m [0m | W | [34m [0m | W | W | W | W | [34m [0m | W | W | W | [34m [0m | [34m [0m | [34m [0m | [34m [0m | [34m [0m |
---------------------------------------------------------------------------------
| W | W | [34m [0m | W | [34m [0m | W | [34m [0m | W | [34mH[0m | [34m [0m | [34m [0m | [34m [0m | [34m [0m | [34m [0m | [34m [0m | [34m [0m | W | W | W | [34m [0m |
---------------------------------------------------------------------------------
| [34mH[0m | W | [31mx[0m | W | [34m [0m | [34m [0m | [34m [0m | W | W | W | W | W | [34m [0m | W | W | W | W | [34mH[0m | [34m [0m | [34m [0m |
---------------------------------------------------------------------------------
| [34m [0m | [34m [0m | [34m [0m | W | W | W | [34m [0m | [34m [0m | [34m [0m | [34mH[0m | W | [34mH[0m | [34m [0m | [34m [0m | [34m [0m | [34mH[0m | W | W | W | [34mG[0m |
------------

| [34m [0m | W | W | W | [34m [0m | W | [34m [0m | [34m [0m | [34m [0m | [34m [0m | [34m [0m | [34m [0m | [34m [0m | [34m [0m | W | W | W | W | W | W |
---------------------------------------------------------------------------------
| [34m [0m | [34m [0m | [34m [0m | [34m [0m | [34m [0m | W | [34m [0m | W | W | W | W | [34m [0m | W | W | W | [34m [0m | [34m [0m | [34m [0m | [34m [0m | [34m [0m |
---------------------------------------------------------------------------------
| W | W | [34m [0m | W | [34m [0m | W | [34m [0m | W | [34mH[0m | [34m [0m | [34m [0m | [34m [0m | [34m [0m | [34m [0m | [34m [0m | [34m [0m | W | W | W | [34m [0m |
---------------------------------------------------------------------------------
| [34mH[0m | W | [34m [0m | W | [34m [0m | [34m [0m | [34m [0m | W | W | W | W | W | [34m [0m | W | W | W | W | [34mH[0m | [34m [0m | [34m [0m |
---------------------------------------

| [34m [0m | W | W | W | [34m [0m | W | [34m [0m | [34m [0m | [34m [0m | [34m [0m | [34m [0m | [34m [0m | [34m [0m | [34m [0m | W | W | W | W | W | W |
---------------------------------------------------------------------------------
| [34m [0m | [34m [0m | [34m [0m | [34m [0m | [34m [0m | W | [34m [0m | W | W | W | W | [34m [0m | W | W | W | [34m [0m | [34m [0m | [34m [0m | [34m [0m | [34m [0m |
---------------------------------------------------------------------------------
| W | W | [34m [0m | W | [34m [0m | W | [34m [0m | W | [34mH[0m | [34m [0m | [34m [0m | [34m [0m | [34m [0m | [34m [0m | [34m [0m | [34m [0m | W | W | W | [34m [0m |
---------------------------------------------------------------------------------
| [34mH[0m | W | [31mx[0m | W | [34m [0m | [34m [0m | [34m [0m | W | W | W | W | W | [34m [0m | W | W | W | W | [34mH[0m | [34m [0m | [34m [0m |
---------------------------------------

| [34m [0m | [34m [0m | [34m [0m | [34m [0m | [34m [0m | W | [34m [0m | W | W | W | W | [34m [0m | W | W | W | [34m [0m | [34m [0m | [34m [0m | [34m [0m | [34m [0m |
---------------------------------------------------------------------------------
| W | W | [31mx[0m | W | [34m [0m | W | [34m [0m | W | [34mH[0m | [34m [0m | [34m [0m | [34m [0m | [34m [0m | [34m [0m | [34m [0m | [34m [0m | W | W | W | [34m [0m |
---------------------------------------------------------------------------------
| [34mH[0m | W | [34m [0m | W | [34m [0m | [34m [0m | [34m [0m | W | W | W | W | W | [34m [0m | W | W | W | W | [34mH[0m | [34m [0m | [34m [0m |
---------------------------------------------------------------------------------
| [34m [0m | [34m [0m | [34m [0m | W | W | W | [34m [0m | [34m [0m | [34m [0m | [34mH[0m | W | [34mH[0m | [34m [0m | [34m [0m | [34m [0m | [34mH[0m | W | W | W | [34mG[0m |
------------

| W | W | [34m [0m | W | [34m [0m | W | [34m [0m | W | [34mH[0m | [34m [0m | [34m [0m | [34m [0m | [34m [0m | [34m [0m | [34m [0m | [34m [0m | W | W | W | [34m [0m |
---------------------------------------------------------------------------------
| [34mH[0m | W | [34m [0m | W | [34m [0m | [34m [0m | [34m [0m | W | W | W | W | W | [34m [0m | W | W | W | W | [34mH[0m | [34m [0m | [34m [0m |
---------------------------------------------------------------------------------
| [34m [0m | [34m [0m | [34m [0m | W | W | W | [34m [0m | [34m [0m | [34m [0m | [34mH[0m | W | [34mH[0m | [34m [0m | [34m [0m | [34m [0m | [34mH[0m | W | W | W | [34mG[0m |
---------------------------------------------------------------------------------
Step 39 : Explore (Up)
---------------------------------------------------------------------------------
| [34m [0m | W | [34mH[0m | [34m [0m | [34m [0m | W | [34mH[0m | W | W | W | W | W | W 

| W | W | [34m [0m | W | [34m [0m | W | [34m [0m | W | [34mH[0m | [34m [0m | [34m [0m | [34m [0m | [34m [0m | [34m [0m | [34m [0m | [34m [0m | W | W | W | [34m [0m |
---------------------------------------------------------------------------------
| [34mH[0m | W | [34m [0m | W | [34m [0m | [34m [0m | [34m [0m | W | W | W | W | W | [34m [0m | W | W | W | W | [34mH[0m | [34m [0m | [34m [0m |
---------------------------------------------------------------------------------
| [34m [0m | [34m [0m | [34m [0m | W | W | W | [34m [0m | [34m [0m | [34m [0m | [34mH[0m | W | [34mH[0m | [34m [0m | [34m [0m | [34m [0m | [34mH[0m | W | W | W | [34mG[0m |
---------------------------------------------------------------------------------
Step 44 : Explore (Left)
---------------------------------------------------------------------------------
| [34m [0m | W | [34mH[0m | [34m [0m | [34m [0m | W | [34mH[0m | W | W | W | W | W | 

| [34m [0m | [34m [0m | [34m [0m | W | W | W | [34m [0m | [34m [0m | [34m [0m | [34mH[0m | W | [34mH[0m | [34m [0m | [34m [0m | [34m [0m | [34mH[0m | W | W | W | [34mG[0m |
---------------------------------------------------------------------------------
Step 49 : Explore (Left)
---------------------------------------------------------------------------------
| [34m [0m | W | [34mH[0m | [34m [0m | [34m [0m | W | [34mH[0m | W | W | W | W | W | W | [34m [0m | [34m [0m | [34m [0m | [34m [0m | [34m [0m | [34m [0m | [34mH[0m |
---------------------------------------------------------------------------------
| [34m [0m | W | W | W | [34m [0m | W | [34m [0m | [34m [0m | [34m [0m | [34m [0m | [34m [0m | [34m [0m | [34m [0m | [34m [0m | W | W | W | W | W | W |
---------------------------------------------------------------------------------
| [34m [0m | [34m [0m | [34m [0m | [31mx[0m | [34m [0m | W | [34m [0m | W 

| [34m [0m | W | [34mH[0m | [34m [0m | [34m [0m | W | [34mH[0m | W | W | W | W | W | W | [34m [0m | [34m [0m | [34m [0m | [34m [0m | [34m [0m | [34m [0m | [34mH[0m |
---------------------------------------------------------------------------------
| [34m [0m | W | W | W | [34m [0m | W | [34m [0m | [34m [0m | [34m [0m | [34m [0m | [34m [0m | [34m [0m | [34m [0m | [34m [0m | W | W | W | W | W | W |
---------------------------------------------------------------------------------
| [34m [0m | [34m [0m | [34m [0m | [34m [0m | [34m [0m | W | [34m [0m | W | W | W | W | [34m [0m | W | W | W | [34m [0m | [34m [0m | [34m [0m | [34m [0m | [34m [0m |
---------------------------------------------------------------------------------
| W | W | [34m [0m | W | [34m [0m | W | [34m [0m | W | [34mH[0m | [34m [0m | [34m [0m | [34m [0m | [34m [0m | [34m [0m | [34m [0m | [34m [0m | W | W | W | [34m [0m |
------------

| W | W | [34m [0m | W | [34m [0m | W | [34m [0m | W | [34mH[0m | [34m [0m | [34m [0m | [34m [0m | [34m [0m | [34m [0m | [34m [0m | [34m [0m | W | W | W | [34m [0m |
---------------------------------------------------------------------------------
| [34mH[0m | W | [34m [0m | W | [34m [0m | [34m [0m | [34m [0m | W | W | W | W | W | [34m [0m | W | W | W | W | [34mH[0m | [34m [0m | [34m [0m |
---------------------------------------------------------------------------------
| [34m [0m | [34m [0m | [34m [0m | W | W | W | [34m [0m | [34m [0m | [31mx[0m | [34mH[0m | W | [34mH[0m | [34m [0m | [34m [0m | [34m [0m | [34mH[0m | W | W | W | [34mG[0m |
---------------------------------------------------------------------------------
Step 60 : Explore (Left)
---------------------------------------------------------------------------------
| [34m [0m | W | [34mH[0m | [34m [0m | [34m [0m | W | [34mH[0m | W | W | W | W | W | 

| [34m [0m | [34m [0m | [34m [0m | W | W | W | [31mx[0m | [34m [0m | [34m [0m | [34mH[0m | W | [34mH[0m | [34m [0m | [34m [0m | [34m [0m | [34mH[0m | W | W | W | [34mG[0m |
---------------------------------------------------------------------------------
Step 66 : Explore (Right)
---------------------------------------------------------------------------------
| [34m [0m | W | [34mH[0m | [34m [0m | [34m [0m | W | [34mH[0m | W | W | W | W | W | W | [34m [0m | [34m [0m | [34m [0m | [34m [0m | [34m [0m | [34m [0m | [34mH[0m |
---------------------------------------------------------------------------------
| [34m [0m | W | W | W | [34m [0m | W | [34m [0m | [34m [0m | [34m [0m | [34m [0m | [34m [0m | [34m [0m | [34m [0m | [34m [0m | W | W | W | W | W | W |
---------------------------------------------------------------------------------
| [34m [0m | [34m [0m | [34m [0m | [34m [0m | [34m [0m | W | [34m [0m | W

| [34mH[0m | W | [34m [0m | W | [34m [0m | [34m [0m | [31mx[0m | W | W | W | W | W | [34m [0m | W | W | W | W | [34mH[0m | [34m [0m | [34m [0m |
---------------------------------------------------------------------------------
| [34m [0m | [34m [0m | [34m [0m | W | W | W | [34m [0m | [34m [0m | [34m [0m | [34mH[0m | W | [34mH[0m | [34m [0m | [34m [0m | [34m [0m | [34mH[0m | W | W | W | [34mG[0m |
---------------------------------------------------------------------------------
Step 71 : Explore (Up)
---------------------------------------------------------------------------------
| [34m [0m | W | [34mH[0m | [34m [0m | [34m [0m | W | [34mH[0m | W | W | W | W | W | W | [34m [0m | [34m [0m | [34m [0m | [34m [0m | [34m [0m | [34m [0m | [34mH[0m |
---------------------------------------------------------------------------------
| [34m [0m | W | W | W | [34m [0m | W | [34m [0m | [34m [0m | [34m [0m | [34m [0m | 

| W | W | [34m [0m | W | [34m [0m | W | [34m [0m | W | [34mH[0m | [34m [0m | [34m [0m | [34m [0m | [34m [0m | [34m [0m | [34m [0m | [34m [0m | W | W | W | [34m [0m |
---------------------------------------------------------------------------------
| [34mH[0m | W | [34m [0m | W | [34m [0m | [34m [0m | [34m [0m | W | W | W | W | W | [34m [0m | W | W | W | W | [34mH[0m | [34m [0m | [34m [0m |
---------------------------------------------------------------------------------
| [34m [0m | [34m [0m | [34m [0m | W | W | W | [34m [0m | [34m [0m | [34m [0m | [34mH[0m | W | [34mH[0m | [34m [0m | [34m [0m | [34m [0m | [34mH[0m | W | W | W | [34mG[0m |
---------------------------------------------------------------------------------
Step 76 : Explore (Down)
---------------------------------------------------------------------------------
| [34m [0m | W | [34mH[0m | [34m [0m | [34m [0m | W | [34mH[0m | W | W | W | W | W | 

| W | W | [34m [0m | W | [34m [0m | W | [34m [0m | W | [34mH[0m | [34m [0m | [34m [0m | [34m [0m | [34m [0m | [34m [0m | [34m [0m | [34m [0m | W | W | W | [34m [0m |
---------------------------------------------------------------------------------
| [34mH[0m | W | [34m [0m | W | [34m [0m | [34m [0m | [31mx[0m | W | W | W | W | W | [34m [0m | W | W | W | W | [34mH[0m | [34m [0m | [34m [0m |
---------------------------------------------------------------------------------
| [34m [0m | [34m [0m | [34m [0m | W | W | W | [34m [0m | [34m [0m | [34m [0m | [34mH[0m | W | [34mH[0m | [34m [0m | [34m [0m | [34m [0m | [34mH[0m | W | W | W | [34mG[0m |
---------------------------------------------------------------------------------
Step 81 : Explore (Up)
---------------------------------------------------------------------------------
| [34m [0m | W | [34mH[0m | [34m [0m | [34m [0m | W | [34mH[0m | W | W | W | W | W | W 

| [34m [0m | [34m [0m | [34m [0m | W | W | W | [34m [0m | [34m [0m | [34m [0m | [34mH[0m | W | [34mH[0m | [34m [0m | [34m [0m | [34m [0m | [34mH[0m | W | W | W | [34mG[0m |
---------------------------------------------------------------------------------
Step 86 : Explore (Up)
---------------------------------------------------------------------------------
| [34m [0m | W | [34mH[0m | [34m [0m | [34m [0m | W | [34mH[0m | W | W | W | W | W | W | [34m [0m | [34m [0m | [34m [0m | [34m [0m | [34m [0m | [34m [0m | [34mH[0m |
---------------------------------------------------------------------------------
| [34m [0m | W | W | W | [34m [0m | W | [34m [0m | [34m [0m | [34m [0m | [34m [0m | [34m [0m | [34m [0m | [34m [0m | [34m [0m | W | W | W | W | W | W |
---------------------------------------------------------------------------------
| [34m [0m | [34m [0m | [34m [0m | [34m [0m | [34m [0m | W | [31mx[0m | W | 

Episode  19999
Step 0 : Initialize (Start)
---------------------------------------------------------------------------------
| [31mx[0m | W | [34mH[0m | [34m [0m | [34m [0m | W | [34mH[0m | W | W | W | W | W | W | [34m [0m | [34m [0m | [34m [0m | [34m [0m | [34m [0m | [34m [0m | [34mH[0m |
---------------------------------------------------------------------------------
| [34m [0m | W | W | W | [34m [0m | W | [34m [0m | [34m [0m | [34m [0m | [34m [0m | [34m [0m | [34m [0m | [34m [0m | [34m [0m | W | W | W | W | W | W |
---------------------------------------------------------------------------------
| [34m [0m | [34m [0m | [34m [0m | [34m [0m | [34m [0m | W | [34m [0m | W | W | W | W | [34m [0m | W | W | W | [34m [0m | [34m [0m | [34m [0m | [34m [0m | [34m [0m |
---------------------------------------------------------------------------------
| W | W | [34m [0m | W | [34m [0m | W | [34m [0m | W | [34mH[0m | [3

| [34m [0m | [34m [0m | [34m [0m | [31mx[0m | [34m [0m | W | [34m [0m | W | W | W | W | [34m [0m | W | W | W | [34m [0m | [34m [0m | [34m [0m | [34m [0m | [34m [0m |
---------------------------------------------------------------------------------
| W | W | [34m [0m | W | [34m [0m | W | [34m [0m | W | [34mH[0m | [34m [0m | [34m [0m | [34m [0m | [34m [0m | [34m [0m | [34m [0m | [34m [0m | W | W | W | [34m [0m |
---------------------------------------------------------------------------------
| [34mH[0m | W | [34m [0m | W | [34m [0m | [34m [0m | [34m [0m | W | W | W | W | W | [34m [0m | W | W | W | W | [34mH[0m | [34m [0m | [34m [0m |
---------------------------------------------------------------------------------
| [34m [0m | [34m [0m | [34m [0m | W | W | W | [34m [0m | [34m [0m | [34m [0m | [34mH[0m | W | [34mH[0m | [34m [0m | [34m [0m | [34m [0m | [34mH[0m | W | W | W | [34mG[0m |
------------

| [34mH[0m | W | [34m [0m | W | [34m [0m | [34m [0m | [31mx[0m | W | W | W | W | W | [34m [0m | W | W | W | W | [34mH[0m | [34m [0m | [34m [0m |
---------------------------------------------------------------------------------
| [34m [0m | [34m [0m | [34m [0m | W | W | W | [34m [0m | [34m [0m | [34m [0m | [34mH[0m | W | [34mH[0m | [34m [0m | [34m [0m | [34m [0m | [34mH[0m | W | W | W | [34mG[0m |
---------------------------------------------------------------------------------
Step 11 : Exploit (Up)
---------------------------------------------------------------------------------
| [34m [0m | W | [34mH[0m | [34m [0m | [34m [0m | W | [34mH[0m | W | W | W | W | W | W | [34m [0m | [34m [0m | [34m [0m | [34m [0m | [34m [0m | [34m [0m | [34mH[0m |
---------------------------------------------------------------------------------
| [34m [0m | W | W | W | [34m [0m | W | [34m [0m | [34m [0m | [34m [0m | [34m [0m | 

| [34m [0m | W | W | W | [34m [0m | W | [34m [0m | [34m [0m | [34m [0m | [31mx[0m | [34m [0m | [34m [0m | [34m [0m | [34m [0m | W | W | W | W | W | W |
---------------------------------------------------------------------------------
| [34m [0m | [34m [0m | [34m [0m | [34m [0m | [34m [0m | W | [34m [0m | W | W | W | W | [34m [0m | W | W | W | [34m [0m | [34m [0m | [34m [0m | [34m [0m | [34m [0m |
---------------------------------------------------------------------------------
| W | W | [34m [0m | W | [34m [0m | W | [34m [0m | W | [34mH[0m | [34m [0m | [34m [0m | [34m [0m | [34m [0m | [34m [0m | [34m [0m | [34m [0m | W | W | W | [34m [0m |
---------------------------------------------------------------------------------
| [34mH[0m | W | [34m [0m | W | [34m [0m | [34m [0m | [34m [0m | W | W | W | W | W | [34m [0m | W | W | W | W | [34mH[0m | [34m [0m | [34m [0m |
---------------------------------------

| [34m [0m | W | [34mH[0m | [34m [0m | [34m [0m | W | [34mH[0m | W | W | W | W | W | W | [34m [0m | [34m [0m | [34m [0m | [34m [0m | [34m [0m | [34m [0m | [34mH[0m |
---------------------------------------------------------------------------------
| [34m [0m | W | W | W | [34m [0m | W | [34m [0m | [34m [0m | [34m [0m | [34m [0m | [34m [0m | [34m [0m | [34m [0m | [34m [0m | W | W | W | W | W | W |
---------------------------------------------------------------------------------
| [34m [0m | [34m [0m | [34m [0m | [34m [0m | [34m [0m | W | [34m [0m | W | W | W | W | [34m [0m | W | W | W | [34m [0m | [34m [0m | [34m [0m | [34m [0m | [34m [0m |
---------------------------------------------------------------------------------
| W | W | [34m [0m | W | [34m [0m | W | [34m [0m | W | [34mH[0m | [34m [0m | [34m [0m | [34m [0m | [34m [0m | [31mx[0m | [34m [0m | [34m [0m | W | W | W | [34m [0m |
------------

| [34mH[0m | W | [34m [0m | W | [34m [0m | [34m [0m | [34m [0m | W | W | W | W | W | [34m [0m | W | W | W | W | [34mH[0m | [34m [0m | [34m [0m |
---------------------------------------------------------------------------------
| [34m [0m | [34m [0m | [34m [0m | W | W | W | [34m [0m | [34m [0m | [34m [0m | [34mH[0m | W | [34mH[0m | [34m [0m | [34m [0m | [34m [0m | [34mH[0m | W | W | W | [34mG[0m |
---------------------------------------------------------------------------------
Step 28 : Exploit (Right)
---------------------------------------------------------------------------------
| [34m [0m | W | [34mH[0m | [34m [0m | [34m [0m | W | [34mH[0m | W | W | W | W | W | W | [34m [0m | [34m [0m | [34m [0m | [34m [0m | [34m [0m | [34m [0m | [34mH[0m |
---------------------------------------------------------------------------------
| [34m [0m | W | W | W | [34m [0m | W | [34m [0m | [34m [0m | [34m [0m | [34m [0m 

Step 34 : Exploit (Down)
---------------------------------------------------------------------------------
| [34m [0m | W | [34mH[0m | [34m [0m | [34m [0m | W | [34mH[0m | W | W | W | W | W | W | [34m [0m | [34m [0m | [34m [0m | [34m [0m | [34m [0m | [34m [0m | [34mH[0m |
---------------------------------------------------------------------------------
| [34m [0m | W | W | W | [34m [0m | W | [34m [0m | [34m [0m | [34m [0m | [34m [0m | [34m [0m | [34m [0m | [34m [0m | [34m [0m | W | W | W | W | W | W |
---------------------------------------------------------------------------------
| [34m [0m | [34m [0m | [34m [0m | [34m [0m | [34m [0m | W | [34m [0m | W | W | W | W | [34m [0m | W | W | W | [34m [0m | [34m [0m | [34m [0m | [34m [0m | [34m [0m |
---------------------------------------------------------------------------------
| W | W | [34m [0m | W | [34m [0m | W | [34m [0m | W | [34mH[0m | [34m [0m | [34m [

In [6]:
# Show a single exploit-only episode without learning to prove learning:

# === Prepare the World === #
step   = 1
state   = q_world.reset()
done    = False

# === Explore Probability = 0 -- Save Values to Restore After === #
eps     = q_world.epsilon
eps_min = q_world.epsilon_min
q_world.epsilon     = 0
q_world.epsilon_min = 0

# === Perform Actions and Print Each Step Until Done === #
print_episode( 'EXPLOIT ONLY' , delay=delay )
print_status( q_world , done , step=0 , action=-1 , delay=delay )
while not done:
    action = q_world.act( )
    next_state, reward, done = q_world.step( action )
    print_status( q_world , done , step , action , delay=delay )
    step += 1
    
# === Restore Changes === #
q_world.epsilon     = eps
q_world.epsilon_min = eps_min

Episode  EXPLOIT ONLY
Step 0 : Initialize (Start)
---------------------------------------------------------------------------------
| [31mx[0m | W | [34mH[0m | [34m [0m | [34m [0m | W | [34mH[0m | W | W | W | W | W | W | [34m [0m | [34m [0m | [34m [0m | [34m [0m | [34m [0m | [34m [0m | [34mH[0m |
---------------------------------------------------------------------------------
| [34m [0m | W | W | W | [34m [0m | W | [34m [0m | [34m [0m | [34m [0m | [34m [0m | [34m [0m | [34m [0m | [34m [0m | [34m [0m | W | W | W | W | W | W |
---------------------------------------------------------------------------------
| [34m [0m | [34m [0m | [34m [0m | [34m [0m | [34m [0m | W | [34m [0m | W | W | W | W | [34m [0m | W | W | W | [34m [0m | [34m [0m | [34m [0m | [34m [0m | [34m [0m |
---------------------------------------------------------------------------------
| W | W | [34m [0m | W | [34m [0m | W | [34m [0m | W | [34mH[0

| [34m [0m | [34m [0m | [34m [0m | [31mx[0m | [34m [0m | W | [34m [0m | W | W | W | W | [34m [0m | W | W | W | [34m [0m | [34m [0m | [34m [0m | [34m [0m | [34m [0m |
---------------------------------------------------------------------------------
| W | W | [34m [0m | W | [34m [0m | W | [34m [0m | W | [34mH[0m | [34m [0m | [34m [0m | [34m [0m | [34m [0m | [34m [0m | [34m [0m | [34m [0m | W | W | W | [34m [0m |
---------------------------------------------------------------------------------
| [34mH[0m | W | [34m [0m | W | [34m [0m | [34m [0m | [34m [0m | W | W | W | W | W | [34m [0m | W | W | W | W | [34mH[0m | [34m [0m | [34m [0m |
---------------------------------------------------------------------------------
| [34m [0m | [34m [0m | [34m [0m | W | W | W | [34m [0m | [34m [0m | [34m [0m | [34mH[0m | W | [34mH[0m | [34m [0m | [34m [0m | [34m [0m | [34mH[0m | W | W | W | [34mG[0m |
------------

| [34m [0m | W | W | W | [34m [0m | W | [34m [0m | [34m [0m | [34m [0m | [34m [0m | [34m [0m | [34m [0m | [34m [0m | [34m [0m | W | W | W | W | W | W |
---------------------------------------------------------------------------------
| [34m [0m | [34m [0m | [34m [0m | [34m [0m | [34m [0m | W | [34m [0m | W | W | W | W | [34m [0m | W | W | W | [34m [0m | [34m [0m | [34m [0m | [34m [0m | [34m [0m |
---------------------------------------------------------------------------------
| W | W | [34m [0m | W | [34m [0m | W | [34m [0m | W | [34mH[0m | [34m [0m | [34m [0m | [34m [0m | [34m [0m | [34m [0m | [34m [0m | [34m [0m | W | W | W | [34m [0m |
---------------------------------------------------------------------------------
| [34mH[0m | W | [34m [0m | W | [34m [0m | [34m [0m | [31mx[0m | W | W | W | W | W | [34m [0m | W | W | W | W | [34mH[0m | [34m [0m | [34m [0m |
---------------------------------------

| [34m [0m | [34m [0m | [34m [0m | W | W | W | [34m [0m | [34m [0m | [34m [0m | [34mH[0m | W | [34mH[0m | [34m [0m | [34m [0m | [34m [0m | [34mH[0m | W | W | W | [34mG[0m |
---------------------------------------------------------------------------------
Step 16 : Exploit (Right)
---------------------------------------------------------------------------------
| [34m [0m | W | [34mH[0m | [34m [0m | [34m [0m | W | [34mH[0m | W | W | W | W | W | W | [34m [0m | [34m [0m | [34m [0m | [34m [0m | [34m [0m | [34m [0m | [34mH[0m |
---------------------------------------------------------------------------------
| [34m [0m | W | W | W | [34m [0m | W | [34m [0m | [34m [0m | [34m [0m | [31mx[0m | [34m [0m | [34m [0m | [34m [0m | [34m [0m | W | W | W | W | W | W |
---------------------------------------------------------------------------------
| [34m [0m | [34m [0m | [34m [0m | [34m [0m | [34m [0m | W | [34m [0m | W

| [34m [0m | W | W | W | [34m [0m | W | [34m [0m | [34m [0m | [34m [0m | [34m [0m | [34m [0m | [34m [0m | [34m [0m | [34m [0m | W | W | W | W | W | W |
---------------------------------------------------------------------------------
| [34m [0m | [34m [0m | [34m [0m | [34m [0m | [34m [0m | W | [34m [0m | W | W | W | W | [34m [0m | W | W | W | [34m [0m | [34m [0m | [34m [0m | [34m [0m | [34m [0m |
---------------------------------------------------------------------------------
| W | W | [34m [0m | W | [34m [0m | W | [34m [0m | W | [34mH[0m | [34m [0m | [34m [0m | [34m [0m | [31mx[0m | [34m [0m | [34m [0m | [34m [0m | W | W | W | [34m [0m |
---------------------------------------------------------------------------------
| [34mH[0m | W | [34m [0m | W | [34m [0m | [34m [0m | [34m [0m | W | W | W | W | W | [34m [0m | W | W | W | W | [34mH[0m | [34m [0m | [34m [0m |
---------------------------------------

| [34m [0m | [34m [0m | [34m [0m | [34m [0m | [34m [0m | W | [34m [0m | W | W | W | W | [34m [0m | W | W | W | [34m [0m | [34m [0m | [31mx[0m | [34m [0m | [34m [0m |
---------------------------------------------------------------------------------
| W | W | [34m [0m | W | [34m [0m | W | [34m [0m | W | [34mH[0m | [34m [0m | [34m [0m | [34m [0m | [34m [0m | [34m [0m | [34m [0m | [34m [0m | W | W | W | [34m [0m |
---------------------------------------------------------------------------------
| [34mH[0m | W | [34m [0m | W | [34m [0m | [34m [0m | [34m [0m | W | W | W | W | W | [34m [0m | W | W | W | W | [34mH[0m | [34m [0m | [34m [0m |
---------------------------------------------------------------------------------
| [34m [0m | [34m [0m | [34m [0m | W | W | W | [34m [0m | [34m [0m | [34m [0m | [34mH[0m | W | [34mH[0m | [34m [0m | [34m [0m | [34m [0m | [34mH[0m | W | W | W | [34mG[0m |
------------