# Import Libraries

In [1]:
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt

# Define a class

In [117]:
class Grid:
    
    def __init__(self,
                 dims=(6,6),
                 rewards={'r_time':-1,'r_pond':-15,'r_croissant':200,'r_cogs':200,'r_work':15},
                 start=(0,1),
                 end=(5,5),
                 positions={'pond':[(2,4),(4,3)], 'cogs':[(5,2)], 'croissant':[(1,4)]},
                 tubes=[ [(0,0), (3,5)], [(1,2), (4,1)] ],
                 walls=[ [(0,2), (0,3)], [(1,2), (1,3)] ,[(2,2), (2,3)], [(1,5), (2,5)], [(3,0), (4,0)], [(3,1), (4,1)] ,[(5,2),(5,3)]],
                 max_steps=500,
                 max_episodes=1000,
                ):
        self._dims = dims
        self._rewards = rewards
        self._start = start
        self._end = end
        self._tubes = tubes
        self._walls = walls
        self._positions = positions
        self._max_steps = max_steps
        self._max_episodes = max_episodes
        
        self._initialize_grid()
        self._initialize_R_matrix()
        
    
    # getters and setters
    @property
    def dims(self):
        return self._dims
    
    @dims.setter
    def dims(self, dims):
        self._dims = dims
        self._initialize_grid()
        self._initialize_R_matrix()
        
    @property
    def start(self):
        return self._start
    
    @start.setter
    def start(self, start):
        self._start = start
    
    @property
    def end(self):
        return self._end
    
    @end.setter
    def end(self, end):
        self._end = end
        
    @property
    def tubes(self):
        return self._tubes
    
    @tubes.setter
    def tubes(self, tubes):
        self._tubes = tubes
        
    @property
    def max_steps(self):
        return self._max_steps
    
    @max_steps.setter
    def max_steps(self, max_steps):
        self._max_steps = max_steps
        
    @property
    def max_episodes(self):
        return self._max_episodes
    
    @max_steps.setter
    def max_episodes(self, max_episodes):
        self._max_episodes= max_episodes
        
    @property
    def walls(self):
        return self._walls
    
    @walls.setter
    def walls(self, walls):
        self._walls = walls
     
    @property
    def grid(self):
        return self._grid
    
    @property
    def R(self):
        return self._R
    
    @property
    def rewards(self):
        return self._rewards
    
    @property
    def positions(self):
        return self._positions
    

    
    def _initialize_grid(self):
        self._grid = np.zeros(self.dims)
        for position in self._positions:
            for pos in self._positions[position]:
                self._grid[pos[0], pos[1]] = self._rewards['r_'+position]
                #print(position, ":", pos, ":", self._grid[pos[0], pos[1]])
        
        self._grid[self._end[0], self._end[1]] = self._rewards['r_work']
        
        '''
        self._grid[0,0]=1
        self._grid[1,5]=1
        #(1,2), (4,1)
        self._grid[1,2]=2
        self._grid[4,1]=2
        '''
        print(self._grid)
        
    def _initialize_R_matrix(self):
        d1 = self.dims[0]
        d2 = self.dims[1]
        self._R = np.empty((d1*d2,d1*d2))
        self._R.fill(np.nan) # Fastest way to initilize R matrix
        
        self.__fillPossibleActions()
        self.__initializeTunnels()
        self.__initializeCrogs()
        self.__initializePonds()
        self.__initializeCroissants()
        self.__initializeWalls()
        

    def move_to(self, l, cell):
            for i in [-self._dims[0], -1, 1, self._dims[0]]:
                if cell + i <= self._dims[0]*self._dims[1]:
                    l.append((cell + i, cell))
            return l
        
    def __fillPossibleActions(self):
        # All moves where reward is -1 for action. Generate programmatically cos writing by hand is tedious
        ones = []
        for i in range(self._dims[0]):
            for j in range(self._dims[1]):
                cell = i*self._dims[0] + j
                if j != 5:
                    ones.append((cell, cell+1)) # move right unless agent is on right edge
                if cell - 6 >= 0:
                    ones.append((cell, cell-6)) # move up if not in top row
                if cell + 6 < 36:
                    ones.append((cell, cell+6)) # move down if cell not in bottom row
                if j != 0:
                    ones.append((cell, cell-1)) # move left if not on left edge
                ones.append((cell, cell)) # staying still is possible, why not?
        
        ones = tuple(zip(*ones))
        self._R[ones] = self._rewards['r_time']
        
    
    def __initializeTunnels(self):
        tubes_cells = []
        for tubes in self._tubes:
            tubes_cell = []
            for tube in tubes:
                cell_nb = tube[0]*self._dims[0]+tube[1]
                tubes_cell.append(cell_nb)
            #print(tubes_cell)
            tubes_cells.append(tuple(tubes_cell))
        for cell in tubes_cells.copy():
            #print(cell)
            tubes_cells.append((cell[1], cell[0]))
        
        tubes_cells = tuple(zip(*tubes_cells))
        self._R[tubes_cells] = self._rewards['r_time']
        
    
    def __initializeCrogs(self):
        cogs = []
        for cog in self._positions['cogs']:
            cogs = self.move_to(cogs, cog[0]*self._dims[0]+cog[1])
            
        cogs = tuple(zip(*cogs))
        self._R[cogs] = self._rewards['r_cogs']
        
    
    def __initializePonds(self):
        # don't fall in the pond!
        #print(self._positions['pond'])
        ponds = []
        for pond in self._positions['pond']:
            p = pond[0]*self._dims[0]+pond[1]
            ponds = self.move_to(ponds, p)
            ponds.extend([(p,p)])
        
        #print(ponds)
        
        ponds = tuple(zip(*ponds))
        self._R[ponds] = self._rewards['r_pond']
        
    def __initializeCroissants(self):
        croissants = []
        for croissant in self._positions['croissant']:
            c = croissant[0]*self._dims[0]+croissant[1]
            croissants = self.move_to(croissants, c)
        
        #print(croissants)
        
        croissants = tuple(zip(*croissants))
        self._R[croissants] = self._rewards['r_croissant']
        
    def __initializeWalls(self):
        for wall in self._walls:
            #print(wall)
            cell0 = wall[0][0]*self._dims[0]+wall[0][1]
            cell1 = wall[1][0]*self._dims[0]+wall[1][1]
            #print(cell0, ":", cell1)
            wall_in_matrix = (cell0, cell1)
            #print(wall_in_matrix)
            self._R[wall_in_matrix] = np.nan
        

In [119]:
grid = Grid()

[[  0.   0.   0.   0.   0.   0.]
 [  0.   0.   0.   0. 200.   0.]
 [  0.   0.   0.   0. -15.   0.]
 [  0.   0.   0.   0.   0.   0.]
 [  0.   0.   0. -15.   0.   0.]
 [  0.   0. 200.   0.   0.  15.]]


In [118]:
def display_R(matrix, start=None, end=None):
    pd.set_option("display.max_columns", None)
    display(pd.DataFrame(matrix).loc[start:end, start:end])
    
display_R(grid.R,25)

Unnamed: 0,25,26,27,28,29,30,31,32,33,34,35
25,-1.0,-1.0,,,,,-1.0,,,,
26,-1.0,-1.0,-15.0,,,,,200.0,,,
27,,-1.0,-15.0,-1.0,,,,,-1.0,,
28,,,-15.0,-1.0,-1.0,,,,,-1.0,
29,,,,-1.0,-1.0,,,,,,-1.0
30,,,,,,-1.0,-1.0,,,,
31,-1.0,,,,,-1.0,-1.0,200.0,,,
32,,-1.0,,,,,-1.0,-1.0,,,
33,,,-15.0,,,,,200.0,-1.0,-1.0,
34,,,,-1.0,,,,,-1.0,-1.0,-1.0


In [70]:
grid.R

array([[-1., -1., nan, ..., nan, nan, nan],
       [-1., -1., -1., ..., nan, nan, nan],
       [nan, -1., -1., ..., nan, nan, nan],
       ...,
       [nan, nan, nan, ..., -1., -1., nan],
       [nan, nan, nan, ..., -1., -1., -1.],
       [nan, nan, nan, ..., nan, -1., -1.]])