In [2]:
import unittest

import numpy as np

grid = [[0, 1, 0, 0, 0, 0],
        [0, 1, 0, 0, 0, 0],
        [0, 1, 0, 0, 0, 0],
        [0, 1, 0, 0, 0, 0],
        [0, 0, 0, 0, 1, 0]]
goal = [len(grid)-1, len(grid[0])-1]
cost = 1 # the cost associated with moving from a cell to an adjacent one

delta = [[-1, 0 ], # go up
         [ 0, -1], # go left
         [ 1, 0 ], # go down
         [ 0, 1 ]] # go right

delta_name = ['^', '<', 'v', '>']

# Dynamic programming and policy

In [3]:
def add_tuples(a,b):
    return (a[0] + b[0], a[1] + b[1])


class Map:
    def __init__(self, grid, goal, cost):
        self.grid = grid
        self.goal = tuple(goal)
        self.cost = cost
        self.dims = self.grid.shape
        
        self.value = np.full(self.grid.shape, 99)
        self.open = [(0, ' ', self.goal)]
        
        self.delta = [[-1, 0 ], # go up
                      [ 0, -1], # go left
                      [ 1, 0 ], # go down
                      [ 0, 1 ]] # go right
        
        # given a move, provides the reverse of this move in text
        self.rev_move_lookup = {
            (-1,  0): 'v',
            ( 0, -1): '>',
            ( 1,  0): '^',
            ( 0,  1): '<',
        }
        
        self.policy = np.full(self.dims, ' ')


    def valid_loc(self, loc):
        x,y = loc
        return (0 <= x < self.dims[0] and 0 <= y < self.dims[1] and self.grid[loc] == 0)
    
    
    def next_expand(self, iters=1):
        for i in range(iters):
            if len(self.open) == 0:
                return

            self.open.sort()
            d, loc_policy, loc = self.open.pop(0)

            if self.value[loc] != 99:
                continue

            self.value[loc] = d
            self.policy[loc] = loc_policy

            for move in self.delta:
                new_loc = add_tuples(loc, move)

                if self.valid_loc(new_loc) and self.value[new_loc] == 99:
                    implied_policy = self.rev_move_lookup[tuple(move)]
                    self.open.append((d + self.cost, implied_policy, new_loc))
                    
                
    def iter_until_solved(self):
        while len(self.open) > 0:
            self.next_expand()
    
    
class DynamicProgTests(unittest.TestCase):
    
    def setUp(self):
        self.grid = np.array([[0, 1, 0, 0, 0, 0],
                              [0, 1, 0, 0, 0, 0],
                              [0, 1, 0, 0, 0, 0],
                              [0, 1, 0, 0, 0, 0],
                              [0, 0, 0, 0, 1, 0]])
        self.goal = (len(self.grid)-1, len(self.grid[0])-1)
        self.cost = 1
        self.m = Map(self.grid, self.goal, self.cost)
    
    
    def test_initialization(self):
        self.assertEqual(self.m.grid.shape, (5,6))
        self.assertEqual(self.m.value.shape, self.m.grid.shape)
        self.assertEqual(self.m.value[(1,1)], 99)

        
    def test_next_expand(self):
        self.m.next_expand()
        self.assertEqual(self.m.value[self.m.goal], 0)
        
        self.m.next_expand()
        self.assertEqual(self.m.value[(3,5)], 1)
        
        self.m.iter_until_solved()
        expected_value = np.array([[11, 99,  7,  6,  5,  4],
                                   [10, 99,  6,  5,  4,  3],
                                   [ 9, 99,  5,  4,  3,  2],
                                   [ 8, 99,  4,  3,  2,  1],
                                   [ 7,  6,  5,  4, 99,  0]])
        self.assertTrue(np.array_equal(self.m.value, expected_value))
        
        
    def test_policy(self):
        self.m.next_expand()
        expected_policy = np.full(self.m.dims, ' ')
        self.assertTrue(np.array_equal(expected_policy, self.m.policy))
        
        self.m.next_expand(iters=3)
        expected_policy[3, 5] = 'v'
        expected_policy[2, 5] = 'v'
        expected_policy[3, 4] = '>'
        self.assertTrue(np.array_equal(expected_policy, self.m.policy))
        
        self.m.iter_until_solved()
        expected_policy = np.array([['v', ' ', '>', '>', '>', 'v'],
                                    ['v', ' ', '>', '>', '>', 'v'],
                                    ['v', ' ', '>', '>', '>', 'v'],
                                    ['v', ' ', '>', '>', '>', 'v'],
                                    ['>', '>', '>', '^', ' ', ' ']])
        self.assertTrue(np.array_equal(expected_policy, self.m.policy))
        
        
if __name__ == '__main__':
    unittest.main(argv=['first-arg-is-ignored'], exit = False)

...
----------------------------------------------------------------------
Ran 3 tests in 0.008s

OK


# Adding left turns 

In [4]:
# ----------
# User Instructions:
# 
# Implement the function optimum_policy2D below.
#
# You are given a car in grid with initial state
# init. Your task is to compute and return the car's 
# optimal path to the position specified in goal; 
# the costs for each motion are as defined in cost.
#
# There are four motion directions: up, left, down, and right.
# Increasing the index in this array corresponds to making a
# a left turn, and decreasing the index corresponds to making a 
# right turn.

forward = [[-1,  0], # go up
           [ 0, -1], # go left
           [ 1,  0], # go down
           [ 0,  1]] # go right
forward_name = ['up', 'left', 'down', 'right']

# action has 3 values: right turn, no turn, left turn
action = [-1, 0, 1]
action_name = ['R', '#', 'L']

# EXAMPLE INPUTS:
# grid format:
#     0 = navigable space
#     1 = unnavigable space 
grid = [[1, 1, 1, 0, 0, 0],
        [1, 1, 1, 0, 1, 0],
        [0, 0, 0, 0, 0, 0],
        [1, 1, 1, 0, 1, 1],
        [1, 1, 1, 0, 1, 1]]

init = [4, 3, 0] # given in the form [row,col,direction]
                 # direction = 0: up
                 #             1: left
                 #             2: down
                 #             3: right
                
goal = [2, 0] # given in the form [row,col]

cost = [2, 1, 20] # cost has 3 values, corresponding to making 
                  # a right turn, no turn, and a left turn

# EXAMPLE OUTPUT:
# calling optimum_policy2D with the given parameters should return 
# [[' ', ' ', ' ', 'R', '#', 'R'],
#  [' ', ' ', ' ', '#', ' ', '#'],
#  ['*', '#', '#', '#', '#', 'R'],
#  [' ', ' ', ' ', '#', ' ', ' '],
#  [' ', ' ', ' ', '#', ' ', ' ']]
# ----------

In [5]:
['a'] * 4

['a', 'a', 'a', 'a']

In [11]:
for i, n in enumerate(['right', 'straight', 'left']):
    print('i = {}; i -1 = {}; n = {}'.format(i, i -1, n))

i = 0; i -1 = -1; n = right
i = 1; i -1 = 0; n = straight
i = 2; i -1 = 1; n = left


In [12]:
[1] + [2,3]

[1, 2, 3]

In [30]:
b = 2
(1 < b < 3) & (0 < 2)

True

In [68]:
def add_tuples(a,b):
    return (a[0] + b[0], a[1] + b[1])


def rev_move(a):
    return [-1 * d for d in a]


class Map:
    def __init__(self, grid, goal, init, cost, action, action_name):
        self.grid = grid
        g_temp = np.reshape(self.grid, (1,) + self.grid.shape)
        self.grid_space = np.vstack([g_temp] * 4)
        
        self.goal = tuple(goal)
        self.init = (init[2], init[0], init[1])
        self.cost = cost
        self.dims = self.grid_space.shape
        
        self.value = np.full(self.grid_space.shape, 999)
        self.open = [(0, '*', (i,) + self.goal) for i in range(4)]
        
        self.delta = [[-1, 0 ], # go up
                      [ 0, -1], # go left
                      [ 1, 0 ], # go down
                      [ 0, 1 ]] # go right
        
        self.action = action
        self.action_name = action_name
        
        self.policy = np.full(self.grid_space.shape, ' ')
        
        # init goal location in policy
        for i in range(4):
            self.policy[(i,) + self.goal] = '*'
        

    def valid_loc(self, loc):
        x, y = loc
        return (0 <= x < self.dims[1] and 0 <= y < self.dims[2] and self.grid[loc] == 0)
    
    
    def next_expand(self, iters=1):
        for i in range(iters):
            if len(self.open) == 0:
                return

            self.open.sort()
            d, loc_policy, loc = self.open.pop(0)

            if self.value[loc] != 999:
                continue

            if loc == self.init:
                self.value[loc] = d
                self.policy[loc] = loc_policy
                return
                
            prev_loc = add_tuples(rev_move(self.delta[loc[0]]), loc[1:])
            
            
            if not self.valid_loc(prev_loc):
                continue
                    
            else:
                self.value[loc] = d
                self.policy[loc] = loc_policy
            
                # consider any of the moves that could have gotten you here in this orientation
                for i, action in enumerate(self.action):
                    new_orientation =  (loc[0] + 1 - i) % 4
                    new_loc = (new_orientation,) + prev_loc
                    cost = self.cost[i]
                    policy = self.action_name[i]
                    
                    self.open.append((d + cost, policy, new_loc))

                    
    def iter_until_solved(self):
        while len(self.open) > 0 and self.value[self.init] == 999:
            self.next_expand()
    
    
class DynamicProgTests(unittest.TestCase):
    
    def setUp(self):
        self.grid = np.array(grid)
        self.goal = goal
        self.init = init
        self.cost = cost
        self.action = action
        self.action_name = action_name
        self.m = Map(self.grid, self.goal, self.init,
                     self.cost, self.action, self.action_name)
    
    
    def test_initialization(self):
        self.assertEqual(self.m.grid_space.shape, (4, 5,6))
        self.assertEqual(self.m.value.shape, self.m.grid_space.shape)
        
        
    def test_next_expand(self):
        # TODO rewrite tests
        self.m.next_expand()
        
        self.m.next_expand()
        
        self.m.iter_until_solved()

        print(self.m.policy)
        

    # TODO test policy and value
       
        
if __name__ == '__main__':
    unittest.main(argv=['first-arg-is-ignored'], exit = False)

..

[[[' ' ' ' ' ' 'R' ' ' ' ']
  [' ' ' ' ' ' '#' ' ' ' ']
  ['*' ' ' ' ' '#' ' ' ' ']
  [' ' ' ' ' ' '#' ' ' ' ']
  [' ' ' ' ' ' '#' ' ' ' ']]

 [[' ' ' ' ' ' ' ' ' ' ' ']
  [' ' ' ' ' ' ' ' ' ' ' ']
  ['*' '#' '#' '#' '#' ' ']
  [' ' ' ' ' ' ' ' ' ' ' ']
  [' ' ' ' ' ' ' ' ' ' ' ']]

 [[' ' ' ' ' ' ' ' ' ' ' ']
  [' ' ' ' ' ' '#' ' ' '#']
  ['*' ' ' ' ' 'R' ' ' 'R']
  [' ' ' ' ' ' ' ' ' ' ' ']
  [' ' ' ' ' ' ' ' ' ' ' ']]

 [[' ' ' ' ' ' ' ' '#' 'R']
  [' ' ' ' ' ' ' ' ' ' ' ']
  ['*' ' ' ' ' ' ' ' ' ' ']
  [' ' ' ' ' ' ' ' ' ' ' ']
  [' ' ' ' ' ' ' ' ' ' ' ']]]



----------------------------------------------------------------------
Ran 2 tests in 0.004s

OK
