In [77]:
# ----------
# User Instructions:
# 
# Define a function, search() that returns a list
# in the form of [optimal path length, row, col]. For
# the grid shown below, your function should output
# [11, 4, 5].
#
# If there is no valid path from the start point
# to the goal, your function should return the string
# 'fail'
# ----------

# Grid format:
#   0 = Navigable space
#   1 = Occupied space


grid = [[0, 0, 1, 0, 0, 0],
        [0, 0, 1, 0, 0, 0],
        [0, 0, 0, 0, 1, 0],
        [0, 0, 1, 1, 1, 0],
        [0, 0, 0, 0, 1, 0]]
init = [0, 0]
goal = [len(grid)-1, len(grid[0])-1]
cost = 1

delta = [[-1, 0], # go up
         [ 0,-1], # go left
         [ 1, 0], # go down
         [ 0, 1]] # go right

delta_name = ['^', '<', 'v', '>']

def search(grid,init,goal,cost):
    # -----------
    # A table of expansion values. This table
    # will keep track of which step at each node was
    # expanded.
    # 
    # AE: This is important to avoid visiting already visited cells
    # AE: if for example a cell is added to open_cells list by more
    # AE: than one expansion.
    #
    # Make sure that the initial cell in the grid 
    # you return has the value 0.
    # ----------
    expand = [ [-1 for col in range(len(grid[0]))] for row in range(len(grid)) ]
    
    # AE: we'll count the expansion step here
    expand_counter = 0
    
    # AE: Here we will store a list of cells, that we can immediately reach from the current cells. 
    # AE: Each member in this list will consist of a cell and its g-value (a.k.a. the steps required to 
    # AE: get to the cell). So something like: (5, [3, 4]).
    # AE: By the way open_cells will also contain closed_cells.
    open_cells = []

    # AE: Similarly we'll need to keep a list of closed cells - cells, that have already been visited.
    # AE: We'll compare against this list what we need to avoid visiting, because it's already been visited.
    # AE: By the way closed_cells will also be part of open_cells.
    closed_cells = []

    # AE: First cell in the open cells list is where we start and the g-value for it is 0.
    open_cells.append((0, init))
    expand[init[0]][init[1]] = expand_counter
    
    # AE: Continue to search until we arrive at the goal
    # AE: let's look at our open cells (ones that are reachable, but we haven't yet looked at)
    while len(open_cells) > 0:
        (current_g, ocell) = open_cells.pop(0) # AE: the open cell at the front of the list
        # AE: we will not want to look at it again, so add it to the closed_cells.
        # AE: We've removed it from the open cells already by popping earlier.
        closed_cells.append(ocell)
        #print("looking at: ", ocell)
        
        # AE: now let's test all the possible moves for their sutability.
        for d in range(len(delta)):
            # AE: Coordinates of the new cell after a potential move
            move_y = delta[d][0]
            move_x = delta[d][1]
            new_y = ocell[0] + move_y
            new_x = ocell[1] + move_x
            
            new_cell = [new_y, new_x] # AE: here we'd end up if we follow the current move
            
            # AE: Now let's check if we actually can move there. And if we can't, then take
            # AE: the next move.
            if (new_cell[0] < 0 
                or new_cell[1] < 0 
                or new_cell[0] >= len(grid) 
                or new_cell[1] >= len(grid[0]) 
                or new_cell in closed_cells
                or grid[new_y][new_x] != 0
                or expand[new_cell[0]][new_cell[1]] > -1):
                continue

            # AE: Storing expansion table
            expand_counter += 1
            expand[new_cell[0]][new_cell[1]] = expand_counter
                
            # AE: Maybe we've arrived at the destination? If so, return here.
            if (new_cell == goal):
                return [current_g + cost, new_y, new_x]
                
            # AE: If we're here, then we can move to the selected place. That new cell
            # AE: needs to be added to the open cells.
            open_cells.append((current_g + cost, new_cell))
            #print(delta_name[d])
    
    # AE: If we're here and have not yet returned, then there is no valid path and we need to return "fail"
    path = "fail"
    
    return path

def search2(grid,init,goal,cost):
    # -----------
    # A table of expansion values. This table
    # will keep track of which step at each node was
    # expanded.
    # 
    # AE: This is important to avoid visiting already visited cells
    # AE: if for example a cell is added to open_cells list by more
    # AE: than one expansion.
    #
    # Make sure that the initial cell in the grid 
    # you return has the value 0.
    # ----------
    expand = [ [-1 for col in range(len(grid[0]))] for row in range(len(grid)) ]
    
    # AE: we'll count the expansion step here
    expand_counter = 0
    
    # AE: Here we will store a list of cells, that we can immediately reach from the current cells. 
    # AE: Each member in this list will consist of a cell and its g-value (a.k.a. the steps required to 
    # AE: get to the cell). So something like: (5, [3, 4]).
    # AE: By the way open_cells will also contain closed_cells.
    open_cells = []

    # AE: Similarly we'll need to keep a list of closed cells - cells, that have already been visited.
    # AE: We'll compare against this list what we need to avoid visiting, because it's already been visited.
    # AE: By the way closed_cells will also be part of open_cells.
    closed_cells = []

    # AE: First cell in the open cells list is where we start and the g-value for it is 0.
    open_cells.append((0, init))
    expand[init[0]][init[1]] = expand_counter

    # AE: Continue to search until we arrive at the goal
    # AE: let's look at our open cells (ones that are reachable, but we haven't yet looked at)
    while len(open_cells) > 0:
        #print(" open_cells: ", open_cells, " closed_cells: ", closed_cells)
        (current_g, ocell) = open_cells.pop(0) # AE: the open cell at the front of the list
        #print("looking at: ", ocell)
        # AE: we will not want to look at it again, so add it to the closed_cells.
        # AE: We've removed it from the open cells already by popping earlier.
        closed_cells.append(ocell)

        # AE: now let's test all the possible moves for their sutability.
        for d in range(len(delta)):
            # AE: Coordinates of the new cell after a potential move
            move_y = delta[d][0]
            move_x = delta[d][1]
            new_y = ocell[0] + move_y
            new_x = ocell[1] + move_x
            
            new_cell = [new_y, new_x] # AE: here we'd end up if we follow the current move
            
            # AE: Now let's check if we actually can move there. And if we can't, then take
            # AE: the next move.
            if (new_cell[0] < 0 
                or new_cell[1] < 0 
                or new_cell[0] >= len(grid) 
                or new_cell[1] >= len(grid[0]) 
                or new_cell in closed_cells
                or grid[new_y][new_x] != 0
                or expand[new_cell[0]][new_cell[1]] > -1):
                continue

            # AE: Storing expansion table
            expand_counter += 1
            expand[new_cell[0]][new_cell[1]] = expand_counter
            
            # AE: Maybe we've arrived at the destination? If so, return here.
            if (new_cell == goal):
                return expand
                
            # AE: If we're here, then we can move to the selected place. That new cell
            # AE: needs to be added to the open cells.
            open_cells.append((current_g + cost, new_cell))
            #print(delta_name[d])
    
    return expand

print(search2(grid,init,goal,cost))

[[0, 2, -1, 15, 17, 19], [1, 4, -1, 13, 16, 18], [3, 6, 9, 11, -1, 20], [5, 8, -1, -1, -1, 21], [7, 10, 12, 14, -1, 22]]


In [None]:
##### Do Not Modify ######

import grader

try:
    response = grader.run_grader(search)
    print(response)    
    
except Exception as err:
    print(str(err))

In [None]:
##### SOLUTION: Run this cell to watch the solution video ######
from IPython.display import HTML
HTML('<iframe width="560" height="315" src="https://www.youtube.com/embed/cl8Kdkr4Gbg" frameborder="0" allow="accelerometer; autoplay; encrypted-media; gyroscope; picture-in-picture" allowfullscreen></iframe>')

<H3>Adding Sebastian Thrun's solution</H3>
And doing the next quiz - print out the path.

In [93]:
# -----------
# User Instructions:
#
# Modify the the search function so that it returns
# a shortest path as follows:
# 
# [['>', 'v', ' ', ' ', ' ', ' '],
#  [' ', '>', '>', '>', '>', 'v'],
#  [' ', ' ', ' ', ' ', ' ', 'v'],
#  [' ', ' ', ' ', ' ', ' ', 'v'],
#  [' ', ' ', ' ', ' ', ' ', '*']]
#
# Where '>', '<', '^', and 'v' refer to right, left, 
# up, and down motions. Note that the 'v' should be 
# lowercase. '*' should mark the goal cell.
#
# You may assume that all test cases for this function
# will have a path from init to goal.
# ----------

grid = [[0, 0, 1, 0, 0, 0],
        [0, 0, 0, 0, 0, 0],
        [0, 0, 1, 0, 1, 0],
        [0, 0, 1, 0, 1, 0],
        [0, 0, 1, 0, 1, 0]]

init = [0, 0]
goal = [len(grid)-1, len(grid[0])-1]
cost = 1

delta = [[-1, 0 ], # go up
         [ 0, -1], # go left
         [ 1, 0 ], # go down
         [ 0, 1 ]] # go right

delta_name = ['^', '<', 'v', '>']

def search(grid,init,goal,cost):
    # ----------------------------------------
    # modify code below
    # ----------------------------------------
    
    # AE: This is where I will store the path. We want a star at the goal.
    policy = [ [' ' for col in range(len(grid[0]))] for row in range(len(grid)) ]
    policy[goal[0]][goal[1]] = "*" # AE: goal needs to be marked with a star.
    
    # AE: This is where we'll store the action that was taken to get to each cell.
    actions = [ [-1 for col in range(len(grid[0]))] for row in range(len(grid)) ]
    
    
    closed = [[0 for col in range(len(grid[0]))] for row in range(len(grid))]
    closed[init[0]][init[1]] = 1

    x = init[0]
    y = init[1]
    g = 0

    open = [[g, x, y]]

    found = False  # flag that is set when search is complete
    resign = False # flag set if we can't find expand

    while not found and not resign:
        if len(open) == 0:
            resign = True
            return 'fail'
        else:
            open.sort()
            open.reverse()
            next = open.pop()
            x = next[1]
            y = next[2]
            g = next[0]
            
            if x == goal[0] and y == goal[1]:
                found = True
            else:
                for i in range(len(delta)):
                    x2 = x + delta[i][0]
                    y2 = y + delta[i][1]
                    if x2 >= 0 and x2 < len(grid) and y2 >=0 and y2 < len(grid[0]):
                        if closed[x2][y2] == 0 and grid[x2][y2] == 0:
                            g2 = g + cost
                            open.append([g2, x2, y2])
                            closed[x2][y2] = 1
                            actions[x2][y2] = i

    # AE: My crude idea was to follow the path of de-creasing g-values from the goal back
    # AE: to the beginning hoping that that should be the most efficien path. Even if that did
    # AE: happen to yield the correct path, I still wouldn't know what action was taken on each cell.
    # AE:
    # AE: Sebastian's solution is to do the same, only in his solution we also have the actions
    # AE: grid, that we can reference and re-construct the actual moves taken along the path.
    # AE: And of course instead of following de-creasing g-values, we actually invert the action
    # AE: and go backwards that way from cell to cell.
    # AE:
    # AE: This is my own implementation of Sebastian's idea.
    current_pos = goal
    while(current_pos != init):
        action_taken = actions[current_pos[0]][current_pos[1]] # AE: action that was taken to get to this cell
        
        # AE: getting the opposite action. In this example we could have just subtracted the current action,
        # AE: but what if the opposite action is not a simple subtraction? Then we need to look it up in a way
        # AE: similar to this (this is trivial though).
        opposite_action = (action_taken + 2) % len(delta)
        #print(current_pos, " ", delta_name[action_taken])
        # AE: updating current position to move back.
        current_pos = [current_pos[0] + delta[opposite_action][0], current_pos[1] + delta[opposite_action][1]]
        
        policy[current_pos[0]][current_pos[1]] = delta_name[action_taken] # AE: building our policy from this action
        
    
    return (policy, [g, x, y]) # make sure you return the shortest path

print(search(grid,init,goal,cost))

([['>', 'v', ' ', ' ', ' ', ' '], [' ', '>', '>', '>', '>', 'v'], [' ', ' ', ' ', ' ', ' ', 'v'], [' ', ' ', ' ', ' ', ' ', 'v'], [' ', ' ', ' ', ' ', ' ', '*']], [9, 4, 5])


<H2>A* search algorithm.</H2>

In [12]:
# -----------
# User Instructions:
#
# Modify the the search function so that it becomes
# an A* search algorithm as defined in the previous
# lectures.
#
# Your function should return the expanded grid
# which shows, for each element, the count when
# it was expanded or -1 if the element was never expanded.
# 
# If there is no path from init to goal,
# the function should return the string 'fail'
# ----------

grid = [[0, 0, 0, 0, 0, 0],
        [0, 1, 1, 1, 1, 0],
        [0, 1, 0, 0, 0, 0],
        [0, 1, 0, 0, 0, 0],
        [0, 1, 0, 0, 1, 0]]
heuristic = [[9, 8, 7, 6, 5, 4],
             [8, 7, 6, 5, 4, 3],
             [7, 6, 5, 4, 3, 2],
             [6, 5, 4, 3, 2, 1],
             [5, 4, 3, 2, 1, 0]]

init = [0, 0]
goal = [len(grid)-1, len(grid[0])-1]
cost = 1

delta = [[-1, 0 ], # go up
         [ 0, -1], # go left
         [ 1, 0 ], # go down
         [ 0, 1 ]] # go right

delta_name = ['^', '<', 'v', '>']

def search(grid,init,goal,cost,heuristic):
    # ----------------------------------------
    # modify the code below
    # ----------------------------------------
    closed = [[0 for col in range(len(grid[0]))] for row in range(len(grid))]
    closed[init[0]][init[1]] = 1

    expand = [[-1 for col in range(len(grid[0]))] for row in range(len(grid))]
    action = [[-1 for col in range(len(grid[0]))] for row in range(len(grid))]

    x = init[0]
    y = init[1]
    g = 0 # AE: g-value for a given cell
    f = 0 # AE: f-value for a given cell
    h = 0 # AE: heuristic for a given cell

    open = [[f, g, h, x, y]]

    found = False  # flag that is set when search is complete
    resign = False # flag set if we can't find expand
    count = 0
    
    while not found and not resign:
        if len(open) == 0:
            resign = True
            return "Fail"
        else:
            open.sort()
            open.reverse()
            next = open.pop()
            x = next[3]
            y = next[4]
            g = next[1]
            expand[x][y] = count
            count += 1
            
            if x == goal[0] and y == goal[1]:
                found = True
            else:
                # AE: look at all actions and evaluate. We need to find a cell with the smallest f-value.
                # AE: f(x, y) = g(x, y) + h(x, y)
                for i in range(len(delta)):
                    x2 = x + delta[i][0]
                    y2 = y + delta[i][1]
                    # AE: If this cell is within the world, then proceed.
                    if x2 >= 0 and x2 < len(grid) and y2 >=0 and y2 < len(grid[0]):
                        # AE: If this cell is not closed and not blocked, then proceed.
                        if closed[x2][y2] == 0 and grid[x2][y2] == 0:
                            g2 = g + cost
                            h = heuristic[x2][y2]
                            f = h + g2
                            open.append([f, g2, h, x2, y2])
                            closed[x2][y2] = 1

    return expand

print(search(grid,init,goal,cost,heuristic))

[[0, 1, 3, 5, 7, 9], [2, -1, -1, -1, -1, 10], [4, -1, -1, -1, -1, 11], [6, -1, -1, -1, -1, 12], [8, -1, -1, -1, -1, 13]]


<H2>Dynamic Programming</H2>

In [64]:
# ----------
# User Instructions:
# 
# Create a function compute_value which returns
# a grid of values. The value of a cell is the minimum
# number of moves required to get from the cell to the goal. 
#
# If a cell is a wall or it is impossible to reach the goal from a cell,
# assign that cell a value of 99.
# ----------

grid = [[0, 0, 0, 0, 0, 0],
        [0, 0, 1, 0, 0, 0],
        [0, 0, 1, 0, 0, 0],
        [0, 0, 0, 0, 1, 0],
        [0, 0, 1, 1, 1, 0],
        [0, 0, 0, 0, 1, 0]]

grid = [[0, 1, 0, 0, 0, 0],
        [0, 1, 0, 0, 0, 0],
        [0, 1, 0, 0, 0, 0],
        [0, 1, 0, 0, 0, 0],
        [0, 0, 0, 0, 1, 0]]

grid = [[0, 1, 0, 1, 0, 0],
        [0, 1, 0, 1, 0, 0],
        [0, 1, 0, 1, 0, 0],
        [0, 1, 0, 1, 0, 0],
        [0, 0, 0, 1, 0, 0]]

goal = [len(grid)-1, len(grid[0])-1]
cost = 1 # the cost associated with moving from a cell to an adjacent one

delta = [[-1, 0 ], # go up
         [ 0, -1], # go left
         [ 1, 0 ], # go down
         [ 0, 1 ]] # go right

delta_name = ['^', '<', 'v', '>']

def compute_value_real(grid,goal,cost,visited=[]):
    # ----------------------------------------
    # insert code below
    # ----------------------------------------
    # AE: We'll start at the goal and then move in all directions away from it
    # AE: recursively.
    cur_y = goal[0]
    cur_x = goal[1]
    
    # AE: If visited matrix is empty, then we're at the very beginning and need
    # AE: to create the visited matrix. Also turns out we need to turn the 1-s
    # AE: into 99-s, because otherwise grader doesn't like my result - if there
    # AE: are isolated areas of the world, then some walls may stay un-converted.
    if(visited==[]):
        # AE: We'll keep track of where we've been before.
        visited = [[0 for row in range(len(grid[0]))] for col in range(len(grid))]
        
        for col in range(len(grid)):
            for row in range(len(grid[0])):
                if grid[col][row] == 1:
                    grid[col][row] = 99
                if grid[col][row] == 0:
                    grid[col][row] = -1
    
    # AE: If we're outside our world, then return and don't continue.
    if (cur_y > len(grid) - 1
       or cur_x > len(grid[0]) - 1
       or cur_y < 0
       or cur_x < 0):
        return grid
    
    # AE: If we've been in this cell before, then normally we wouldn't want
    # AE: to look at it, except when our new result is better than previously
    # AE: calculated. In that case we've been here before as a result of depth-
    # AE: first search recursion, but we can do better with this new breadth-
    # AE: first recursion value.
    if (grid[cur_y][cur_x] > cost - 1 and grid[cur_y][cur_x] < 99):
        # AE: overriding "visited" mark, because we can do better.
        visited[cur_y][cur_x] = 0
    
    # AE: so only proceed here if we have either never been here or can do better
    # AE: now than before.
    if(visited[cur_y][cur_x] == 1):
        return grid
    else:
        visited[cur_y][cur_x] = 1
        # AE: If we hit a border, then we just return
        if (grid[cur_y][cur_x] == 99):
            return grid
        else:
            # AE: putting in the value of this cell. I subtract 1 here because the cell, where we start -
            # AE: the goal cell needs to have 0 as a value and initially cost is 1.
            grid[cur_y][cur_x] = cost - 1

        for d in range(len(delta)):
            new_y = goal[0] + delta[d][0]
            grid = compute_value_real(grid, [goal[0] + delta[d][0], goal[1] + delta[d][1]], cost + 1, visited)
        
        return grid
    # make sure your function returns a grid of values as 
    # demonstrated in the previous video.
    #return value 

# AE: I have a slight disgreement with Udacity's grader. I think that the cells, that are
# AE: unreachable, should be marked differently to the occupied borders, but Udacity's
# AE: grader requires that unreachable cells are marked just like borders. Purely for
# AE: that reason, I have this construct of two functions: compute_value(grid,goal,cost)
# AE: and compute_value_real(grid,goal,cost,visited=[]). The real work is done in the
# AE: compute_value_real. This compute_value function only translates the output to what
# AE: Udacity's grader wants.
def compute_value(grid,goal,cost):
    grid = compute_value_real(grid,goal,cost,visited=[])
    
    # AE: Now the very last step - just to keep Udacity's grader happy,
    # AE: turn the unvisited cells (because unreachable) to 99.
    for col in range(len(grid)):
        for row in range(len(grid[0])):
            if grid[col][row] == -1:
                grid[col][row] = 99
    return grid
    
def show_floats(p):
    rows = ['[' + ','.join(map(lambda x: '{0:.5f}'.format(x),r)) + ']' for r in p]
    print('[' + ',\n '.join(rows) + ']')

def show(p):
    rows = ['[' + ','.join(map(lambda x: '{0}'.format(x),r)) + ']' for r in p]
    print('[' + ',\n '.join(rows) + ']')
    
print(show(compute_value(grid,goal,cost)))
#print(show(visited))

[[99,99,99,99,5,4],
 [99,99,99,99,4,3],
 [99,99,99,99,3,2],
 [99,99,99,99,2,1],
 [99,99,99,99,1,0]]
None


<H3>Adding Sebastian Thrun's solution and getting ready for printing out the optimal path with dynamic programming.</H3>

In [82]:
# ----------
# User Instructions:
# 
# Write a function optimum_policy that returns
# a grid which shows the optimum policy for robot
# motion. This means there should be an optimum
# direction associated with each navigable cell from
# which the goal can be reached.
# 
# Unnavigable cells as well as cells from which 
# the goal cannot be reached should have a string 
# containing a single space (' '), as shown in the 
# previous video. The goal cell should have '*'.
# ----------

grid = [[0, 1, 0, 0, 0, 0],
        [0, 1, 0, 0, 0, 0],
        [0, 1, 0, 0, 0, 0],
        [0, 1, 0, 0, 0, 0],
        [0, 0, 0, 0, 1, 0]]

grid = [[0, 1, 0, 0, 0, 0],
        [0, 1, 1, 0, 1, 0],
        [0, 0, 0, 0, 1, 0],
        [0, 1, 1, 1, 1, 0],
        [0, 1, 0, 1, 1, 0]]

grid = [[0, 1, 0, 1, 0, 0],
        [0, 1, 0, 1, 0, 0],
        [0, 1, 0, 1, 0, 0],
        [0, 1, 0, 1, 1, 1],
        [0, 0, 0, 1, 0, 0]]

init = [0, 0]
goal = [len(grid)-1, len(grid[0])-1]
cost = 1 # the cost associated with moving from a cell to an adjacent one

delta = [[-1, 0 ], # go up
         [ 0, -1], # go left
         [ 1, 0 ], # go down
         [ 0, 1 ]] # go right

delta_name = ['^', '<', 'v', '>']

def optimum_policy(grid,goal,cost):
    # ----------------------------------------
    # modify code below
    # ----------------------------------------
    
    # AE: We will want a matrix of actions that was used for each cell to get to the
    # AE: best policy for that cell. From that we'll get the final optimum policy
    # AE: (called "optimal_policy" here).
    actions = [[-1 for row in range(len(grid[0]))] for col in range(len(grid))]
    optimal_policy = [[' ' for row in range(len(grid[0]))] for col in range(len(grid))]
    full_policy = [[' ' for row in range(len(grid[0]))] for col in range(len(grid))]
    
    value = [[99 for row in range(len(grid[0]))] for col in range(len(grid))]
    change = True

    while change:
        change = False

        for x in range(len(grid)):
            for y in range(len(grid[0])):
                if goal[0] == x and goal[1] == y:
                    if value[x][y] > 0:
                        value[x][y] = 0

                        change = True

                elif grid[x][y] == 0:
                    for a in range(len(delta)):
                        x2 = x + delta[a][0]
                        y2 = y + delta[a][1]

                        if x2 >= 0 and x2 < len(grid) and y2 >= 0 and y2 < len(grid[0]) and grid[x2][y2] == 0:
                            v2 = value[x2][y2] + cost

                            if v2 < value[x][y]:
                                change = True
                                # AE: This is the place where we finally choose a value for the cell. This value
                                # AE: is chosen based on an action. Let's remember that action.
                                actions[x][y] = a
                                full_policy[x][y] = delta_name[a]
                                value[x][y] = v2

    # AE: At this point we now have a value function (value matrix) calculated one way or another (could use my way
    # AE: if desired). Now we need to choose the optimum policy. What do we do? We start at init cell and move
    # AE: according to the best action at that cell and store that action separately in the optimum policy.
    # AE:
    # AE: By the way, I'll use the x and y correctly here. I think that Sebastian had swapped them around - and
    # AE: although used consistently, they were still swapped. It doesn't matter as they were just swapped labels.
    cell = init
    while(cell != goal):
        y = cell[0]
        x = cell[1]
        action_taken = actions[y][x]
        if (action_taken == -1): break
        optimal_policy[y][x] = delta_name[action_taken]
        cell = [y + delta[action_taken][0], x + delta[action_taken][1]]
        print (cell)
    
    # AE: And it needs an asterix at the goal cell
    optimal_policy[goal[0]][goal[1]] = "*"
    full_policy[goal[0]][goal[1]] = "*"
    
    return full_policy

def show(p):
    rows = ['[' + ','.join(r) + ']' for r in p]
    print('[' + ',\n '.join(rows) + ']')
    
print(show(optimum_policy(grid,goal,cost)))

[[ , , , , , ],
 [ , , , , , ],
 [ , , , , , ],
 [ , , , , , ],
 [ , , , ,>,*]]
None


<H2>Starting final quiz - Navigating a car in a discreet world.</H2>

In [None]:
# ----------
# User Instructions:
# 
# Implement the function optimum_policy2D below.
#
# You are given a car in grid with initial state
# init. Your task is to compute and return the car's 
# optimal path to the position specified in goal; 
# the costs for each motion are as defined in cost.
#
# There are four motion directions: up, left, down, and right.
# Increasing the index in this array corresponds to making a
# a left turn, and decreasing the index corresponds to making a 
# right turn.

forward = [[-1,  0], # go up
           [ 0, -1], # go left
           [ 1,  0], # go down
           [ 0,  1]] # go right
forward_name = ['up', 'left', 'down', 'right']

# action has 3 values: right turn, no turn, left turn
action = [-1, 0, 1]
action_name = ['R', '#', 'L']

# EXAMPLE INPUTS:
# grid format:
#     0 = navigable space
#     1 = unnavigable space 
grid = [[1, 1, 1, 0, 0, 0],
        [1, 1, 1, 0, 1, 0],
        [0, 0, 0, 0, 0, 0],
        [1, 1, 1, 0, 1, 1],
        [1, 1, 1, 0, 1, 1]]

init = [4, 3, 0] # given in the form [row,col,direction]
                 # direction = 0: up
                 #             1: left
                 #             2: down
                 #             3: right
                
goal = [2, 0] # given in the form [row,col]

cost = [2, 1, 20] # cost has 3 values, corresponding to making 
                  # a right turn, no turn, and a left turn

# EXAMPLE OUTPUT:
# calling optimum_policy2D with the given parameters should return 
# [[' ', ' ', ' ', 'R', '#', 'R'],
#  [' ', ' ', ' ', '#', ' ', '#'],
#  ['*', '#', '#', '#', '#', 'R'],
#  [' ', ' ', ' ', '#', ' ', ' '],
#  [' ', ' ', ' ', '#', ' ', ' ']]
# ----------

# ----------------------------------------
# modify code below
# ----------------------------------------

def optimum_policy2D(grid,init,goal,cost):

    return policy2D
