In [238]:
import math
import numpy as np

In [239]:
book_grid = [[' ',' ',' ',+10],
            [' ','#',' ',-10],
            ['@',' ',' ',' ']]

class GridWorld(object):
    """
    This is a nice approximation for all the complexity of the entire universe 
        –Charles Isbell
    """
    MARKER_WALL = '#'
    MARKER_CURR_STATE = '@'
    MARKER_FREE_SPACE = ' '
    
    def __init__(self, grid=book_grid, living_reward=0.0, noise=0.1):
        
        """
        Returns a grid world with input specifications. 
        
        Grid spec: '#' walls, '<int>' terminal state, 'S' initial position, ' ' free space
        """
        
        self.grid = grid
        self.living_reward = living_reward
        self.noise = noise
        self.actions = ("North", "East", "South", "West")
        self.n_actions = len(self.actions)
        self.curr_state, self.states, self.states_feture_vec = self._compute_states(grid)
        self.n_states = len(self.states)        
        self.f_sj = lambda state: self.states_feture_vec[state]
        self.feature_matrix = self.get_feature_matrix(self.states)
    
    def _compute_states(self, grid, feature_kind='2d_loc'):
        
        n = 0
        curr_state = None
        states = []
        states_feture_vec = {}
        for row in range(len(grid)):
            for col in range(len(grid[row])):
                if grid[row][col] != self.MARKER_WALL:
                    if grid[row][col] == self.MARKER_CURR_STATE:
                        curr_state = n
                    states.append(n)
                    if feature_kind == '2d_loc':
                        states_feture_vec[n] = (row, col)
                    n += 1
        return curr_state, states, states_feture_vec
    
    def __is_terminal(self, state):
        r, c = self.feature_matrix[state]
        return isinstance(self.grid[r][c], int) or isinstance(self.grid[r][c], float)
    
    def get_states(self):
        return states
    
    def get_curr_state(self):
        return self.curr_state
    
    def get_curr_state_feat_vec(self, state):
        return self.f_sj(state)
    
    def get_feature_matrix(self, states):
        return [self.f_sj(sj) for sj in states]
    
    def get_possible_actions(self, state):
        
        if self.__is_terminal(state):
            return None
        
        return self.actions
    
    def get_reward(self, state, action):
        r, c = self.feature_matrix[state]
        if isinstance(self.grid[r][c], int) or isinstance(self.grid[r][c], float):
            return self.grid[r][c]
        else:
            return self.living_reward
        
    def __str__(self):
        
        msg = ''
        cell_filler = "_"
        grid = self.grid
        curr_r, curr_c = self.feature_matrix[self.curr_state]
        for r in range(len(grid)):
            for c in range(len(grid[r])):
                if grid[r][c] == self.MARKER_FREE_SPACE\
                    or (grid[r][c] == self.MARKER_CURR_STATE and (r != curr_r or c != curr_c)):
                    tt = cell_filler
                elif r == curr_r and c == curr_c:
                    tt = self.MARKER_CURR_STATE
                else:
                    if isinstance(grid[r][c], int) or isinstance(grid[r][c], float):
                        tt = "{:+d}".format(grid[r][c])
                    else:
                        tt = grid[r][c]
                msg += "{txt:{fill}^5s}".format(txt=tt, fill=cell_filler)
                msg += "\t"
            msg += "\n"
        msg += "\n"
        return msg
        

In [240]:
gw = GridWorld(book_grid)

In [241]:
print(gw)

_____	_____	_____	_+10_	
_____	__#__	_____	_-10_	
__@__	_____	_____	_____	


