In [1]:
import math
import numpy as np

In [24]:
book_grid = [[' ',' ',' ',+10],
            [' ','#',' ',-10],
            ['@',' ',' ',' ']]

class GridWorld(object):
    """
    This is a nice approximation for all the complexity of the entire universe 
        –Charles Isbell
    """
    MARKER_WALL = '#'
    MARKER_CURR_STATE = '@'
    MARKER_FREE_SPACE = ' '
    
    def __init__(self, grid, living_reward=-0.01, a_noise_dist=[0.1, 0.8, 0.1]):
        
        """
        Returns a grid world with input specifications. 
        
        Grid spec: '#' walls, '<int>' terminal state, 'S' initial position, ' ' free space
        """
        
        assert(len(grid) != 0 and len(grid[0]) != 0)
        
        self.grid = grid.copy()
        self.width = len(grid[0])
        self.height = len(grid)
        self.living_reward = living_reward
        self.a_noise_dist = a_noise_dist
        self.actions = {"North": (-1, 0), "East": (0, 1), "South": (1, 0), "West": (0, -1)}
        self.action_str_to_id = dict(zip(self.actions.keys(), range(len(self.actions.keys())))) # 0: North, 1: East, ...
        self.action_id_to_str = dict(zip(range(len(self.actions.keys())), self.actions.keys())) # 0: North, 1: East, ...
        self.n_actions = len(self.actions)
        self.curr_state, self.states, self.states_to_feture_vec, self.feature_vec_to_state = self._compute_states(grid)
        self.n_states = len(self.states)        
        self.f_sj = lambda state: self.states_to_feture_vec[state]
    
    def _compute_states(self, grid, feature_kind='2d_loc'):
        
        n = 0
        curr_state = None
        states = []
        states_to_feture_vec = {}
        feature_vec_to_state = {}
        for row in range(self.height):
            for col in range(self.width):
                if grid[row][col] != self.MARKER_WALL:
                    if grid[row][col] == self.MARKER_CURR_STATE:
                        curr_state = n
                    states.append(n)
                    if feature_kind == '2d_loc':
                        states_to_feture_vec[n] = (row, col)
                        feature_vec_to_state[(row, col)] = n
                    n += 1
        return curr_state, states, states_to_feture_vec, feature_vec_to_state
    
    def __is_terminal(self, state):
        r, c = self.f_sj(state)
        return isinstance(self.grid[r][c], int) or isinstance(self.grid[r][c], float)
    
    def get_states(self):
        return states
    
    def get_curr_state(self):
        return self.curr_state
    
    def get_feature_matrix(self, states):
        return [self.f_sj(sj) for sj in states]
    
    def get_possible_actions(self, state):
        
        if self.__is_terminal(state):
            return None
        
        return self.actions
    
    def T(self, state, a_id):
        
        delta = self.actions[self.action_id_to_str[a_id]]
        r, c = self.f_sj(state)
        new_r, new_c = r+delta[0], c+delta[1]
        
        if new_r < 0 or new_r >= self.height \
            or new_c < 0 or new_c >= self.width \
            or self.grid[new_r][new_c] == self.MARKER_WALL:
                new_r, new_c = r, c
#         self.grid[r][c] = self.MARKER_FREE_SPACE
#         self.grid[new_r][new_c] = self.MARKER_CURR_STATE
        self.curr_state = self.feature_vec_to_state[(new_r, new_c)]
        return self.curr_state
    
    def move(self, stochastic_action):
        
        na = self.n_actions
        a_id = self.action_str_to_id[stochastic_action]
        a_id = np.random.choice([a_id-1, a_id, (a_id+1)%na], p=self.a_noise_dist)
        return self.T(self.curr_state, a_id)
    
    def get_reward(self, state=None, action=None):
        
        if state == None:
            state = self.curr_state
        r, c = self.f_sj(state)
        if isinstance(self.grid[r][c], int) or isinstance(self.grid[r][c], float):
            return float(self.grid[r][c])
        else:
            return self.living_reward
        
    def __str__(self):
        
        msg = ''
        cell_filler = "_"
        grid = self.grid
        curr_r, curr_c = self.f_sj(self.curr_state)
        for r in range(self.height):
            for c in range(self.width):
                if r == curr_r and c == curr_c:
                    tt = self.MARKER_CURR_STATE
                elif grid[r][c] == self.MARKER_FREE_SPACE\
                    or (grid[r][c] == self.MARKER_CURR_STATE and (r != curr_r or c != curr_c)):
                    tt = cell_filler
                else:
                    if isinstance(grid[r][c], int) or isinstance(grid[r][c], float):
                        tt = "{:+d}".format(grid[r][c])
                    else:
                        tt = grid[r][c]
                msg += "{txt:{fill}^5s}".format(txt=tt, fill=cell_filler)
                msg += "\t"
            msg += "\n"
        msg += "\n"
        return msg
        

In [25]:
gw = GridWorld(book_grid, a_noise_dist=[0,1.,0])

In [26]:
print(gw)

_____	_____	_____	_+10_	
_____	__#__	_____	_-10_	
__@__	_____	_____	_____	




In [27]:
gw.get_reward(gw.curr_state)

-0.01

In [28]:
As = ["North", "North", "North", "East", "East", "East"]
for a in As:
    gw.move(a)
    print(gw)
    print("Reward: ", gw.get_reward())

_____	_____	_____	_+10_	
__@__	__#__	_____	_-10_	
_____	_____	_____	_____	


Reward:  -0.01
__@__	_____	_____	_+10_	
_____	__#__	_____	_-10_	
_____	_____	_____	_____	


Reward:  -0.01
__@__	_____	_____	_+10_	
_____	__#__	_____	_-10_	
_____	_____	_____	_____	


Reward:  -0.01
_____	__@__	_____	_+10_	
_____	__#__	_____	_-10_	
_____	_____	_____	_____	


Reward:  -0.01
_____	_____	__@__	_+10_	
_____	__#__	_____	_-10_	
_____	_____	_____	_____	


Reward:  -0.01
_____	_____	_____	__@__	
_____	__#__	_____	_-10_	
_____	_____	_____	_____	


Reward:  10.0
