In [1]:

delta = [[-1, 0 ], # go up
         [ 0, -1], # go left
         [ 1, 0 ], # go down
         [ 0, 1 ]] # go right

delta_name = ['^', '<', 'v', '>'] # Use these when creating your policy grid.

# ---------------------------------------------
#  Modify the function stochastic_value below
# ---------------------------------------------

def stochastic_value(grid,goal,cost_step,collision_cost,success_prob):
    failure_prob = (1.0 - success_prob)/2.0 # Probability(stepping left) = prob(stepping right) = failure_prob
    value = [[collision_cost for col in range(len(grid[0]))] for row in range(len(grid))]
    policy = [[' ' for col in range(len(grid[0]))] for row in range(len(grid))]
    
    change = True
    while change:
        change = False
        
        for x in range(len(grid)):
            for y in range(len(grid[0])):
                if x == goal[0] and y == goal[1]:
                    if value[x][y] > 0:
                        change = True
                        value[x][y] = 0
                        policy[x][y] = '*'
                elif grid[x][y] == 0:
                    for a in range(len(delta)):
                        x2_up = x + delta[a][0]
                        y2_up = y + delta[a][1]
                        
                        # if up_exceed == 0 and grid[x2_up][y2_up] == 0:
                        x2_left = x + delta[(a+1)%4][0]
                        y2_left = y + delta[(a+1)%4][1]
                        x2_right = x + delta[(a-1)%4][0]
                        y2_right = y + delta[(a-1)%4][1]
                        
                        up_exceed = 1 if x2_up<0 or x2_up>=len(grid) or y2_up<0 or y2_up>=len(grid[0]) else 0
                        if up_exceed == 0:
                            cost_up = success_prob * value[x2_up][y2_up]
                        else:
                            cost_up = success_prob * collision_cost 
                        
                        left_exceed = 1 if x2_left<0 or x2_left>=len(grid) or y2_left<0 or y2_left>=len(grid[0]) else 0
                        if left_exceed == 0:
                            cost_left = failure_prob * value[x2_left][y2_left]
                        else:
                            cost_left = failure_prob * collision_cost
                        
                        right_exceed = 1 if x2_right<0 or x2_right>=len(grid) or y2_right<0 or y2_right>=len(grid[0]) else 0
                        if right_exceed == 0:
                            cost_right = failure_prob * value[x2_right][y2_right]
                        else:
                            cost_right = failure_prob * collision_cost
                        
                        cost_sum = cost_up + cost_left + cost_right + cost_step
                        if cost_sum < value[x][y]:
                            change = True
                            value[x][y] = cost_sum
                            policy[x][y] = delta_name[a]
                        
    
    return value, policy

# ---------------------------------------------
#  Use the code below to test your solution
# ---------------------------------------------

grid = [[0, 0, 0, 0],
        [0, 0, 0, 0],
        [0, 0, 0, 0],
        [0, 1, 1, 0]]
goal = [0, len(grid[0])-1] # Goal is in top right corner
cost_step = 1
collision_cost = 1000    # penalty 撞墙的惩罚
success_prob = 0.5

value,policy = stochastic_value(grid,goal,cost_step,collision_cost,success_prob)
for row in value:
    print(row)
for row in policy:
    print(row)

# Expected outputs:
#
#[471.9397246855924, 274.85364957758316, 161.5599867065471, 0],
#[334.05159958720344, 230.9574434590965, 183.69314862430264, 176.69517762501977], 
#[398.3517867450282, 277.5898270101976, 246.09263437756917, 335.3944132514738], 
#[700.1758933725141, 1000, 1000, 668.697206625737]


#
# ['>', 'v', 'v', '*']
# ['>', '>', '^', '<']
# ['>', '^', '^', '<']
# ['^', ' ', ' ', '^']


[471.9397246855924, 274.85364957758316, 161.5599867065471, 0]
[334.05159958720344, 230.9574434590965, 183.69314862430264, 176.69517762501977]
[398.3517867450282, 277.5898270101976, 246.09263437756917, 335.3944132514738]
[700.1758933725141, 1000, 1000, 668.697206625737]
['>', 'v', 'v', '*']
['>', '>', '^', '<']
['>', '^', '^', '<']
['^', ' ', ' ', '^']
