# Welcome to the Grid

In [1]:
type gridworld
  width::Int
  height::Int
  blockedStates::Array{Array{Int, 1}, 1}
  exitStates::Array{Array{Int, 1}, 1}
end

function states(g::gridworld)
  stateIndex = [[x, y] for x in 1:g.width for y in 1:g.height]
  stateIndex = filter(x->!(x in g.blockedStates), stateIndex)
  return stateIndex
end

function printGrid(g::gridworld)
  gridStates = states(g)
  mat = Array(Any, g.width, g.height)
  for (i, x) in enumerate(gridStates)
    mat[x[1], x[2]] = "$(x[1]),$(x[2])"
  end
  for (i, x) in enumerate(g.blockedStates)
    mat[x[1], x[2]] = "*"
  end
  for r in 1:size(mat, 1)
    for c in 1:size(mat, 2)
      @printf("%-4s   %s", mat[r,c], "")
    end
    print("\n")
  end
  println("Blocked: *")
  println("Exit states: $(join(["[$(x[1]),$(x[2])]" for x in g.exitStates], ", "))")
  return mat
end
;

In [3]:
g = gridworld(4,4, [[2,2], [3,3]], [[3, 1]])
g.blockedStates
g.exitStates
gridStates = states(g)
printGrid(g)
;

1,1    1,2    1,3    1,4    
2,1    *      2,3    2,4    
3,1    3,2    *      3,4    
4,1    4,2    4,3    4,4    
Blocked: *
Exit states: [3,1]


In [6]:
function randomPolicy(pos::Array{Int, 1}, g::gridworld)
  actions = states(g)
  policy = zeros(size(actions, 1))
  i,j = pos
  if (pos in g.exitStates) # absorbent case
    ind = find([x == pos for x in actions])
    policy[ind] = 1
  else # non absorvent
    nbs = [[i-1,j], [i+1,j], [i,j-1], [i,j+1]]
    nbs = filter(x -> x in actions, nbs)
    n_nbs = size(nbs, 1)
    for x in nbs
      ind = find([state == x for state in actions])
      policy[ind] = 1/n_nbs
    end
  end
  return policy, actions
end
;

In [8]:
print(randomPolicy([1,1], g))

([0.0,0.5,0.0,0.0,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0],Array{Int64,1}[[1,1],[1,2],[1,3],[1,4],[2,1],[2,3],[2,4],[3,1],[3,2],[3,4],[4,1],[4,2],[4,3],[4,4]])

In [9]:
print(randomPolicy([1,4], g))


([0.0,0.0,0.5,0.0,0.0,0.0,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0],Array{Int64,1}[[1,1],[1,2],[1,3],[1,4],[2,1],[2,3],[2,4],[3,1],[3,2],[3,4],[4,1],[4,2],[4,3],[4,4]])

In [10]:
print(randomPolicy([2,1], g))


([0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.5,0.0,0.0,0.0,0.0,0.0,0.0],Array{Int64,1}[[1,1],[1,2],[1,3],[1,4],[2,1],[2,3],[2,4],[3,1],[3,2],[3,4],[4,1],[4,2],[4,3],[4,4]])

In [11]:
print(randomPolicy([3,1], g))

([0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0],Array{Int64,1}[[1,1],[1,2],[1,3],[1,4],[2,1],[2,3],[2,4],[3,1],[3,2],[3,4],[4,1],[4,2],[4,3],[4,4]])

In [13]:

function policyEval(policy::Function, g::gridworld; niter = 50)
  gridStates = states(g)
  transMat = reduce(vcat, [transpose(policy(x, g)[1]) for x in gridStates])
  rewardVect = map(x -> (x in g.exitStates) ? 0 : -1, gridStates)
  v = zeros(size(gridStates,1))
  for i in 1:niter
    v = rewardVect + transMat*v
  end
  return v
end

val = policyEval(randomPolicy, g, niter = 1000)

function valueGrid(val::Array{Float64, 1}, g::gridworld)
  gridStates = states(g)
  mat = Array(Any, g.width, g.height)
  for (i, x) in enumerate(gridStates)
    mat[x[1], x[2]] = val[i]
  end
  for (i, x) in enumerate(g.blockedStates)
    mat[x[1], x[2]] = "*"
  end
  println("Blocked: *")
  println("Exit states: $(join(["[$(x[1]),$(x[2])]" for x in g.exitStates], ", "))")
  return mat
end
;



In [14]:
valueGrid(val, g)


Blocked: *
Exit states: [3,1]


4×4 Array{Any,2}:
 -23.2  -31.8   -38.4   -40.2
 -12.6     "*"  -40.2   -40.0
   0.0   -8.2      "*"  -36.6
  -8.2  -14.4   -23.8   -31.2

In [36]:

function greedyArrowGrid(val::Array{Float64, 1}, g::gridworld)
  valGrid = valueGrid(val, g)
  arrowGrid = Array(Any, g.width, g.height)
  gridStates = states(g)
  for s in gridStates
    i, j = s
    if s in g.exitStates
      arrowGrid[i,j] = ""
    else
      arrow = ""
      nbs = [[i-1,j], [i+1,j], [i,j-1], [i,j+1]]
      nbsVals = map(x -> (x in gridStates) ? round(valGrid[x...], 2) : -Inf, nbs)
      maxVal = maximum(nbsVals)
      maxInd = find([x == maxVal for x in nbsVals])
      for ind in maxInd
        if ind == 1
          arrow *= "↑"
        elseif ind == 2
          arrow *= "↓"
        elseif ind == 3
          arrow *= "←"
        elseif ind == 4
          arrow *= "→"
        end
      end
      arrowGrid[i,j] = arrow
    end
  end
  for x in g.blockedStates
    arrowGrid[x...] = "Blocked"
  end
  for x in g.exitStates
    arrowGrid[x...] = "Exit"
  end
  return arrowGrid
end
;



In [37]:

g = gridworld(4,4, [[2,2], [3,3]], [[3, 1]])
val = policyEval(randomPolicy, g, niter = 1000)
greedyArrowGrid(val, g)


Blocked: *
Exit states: [3,1]


4×4 Array{Any,2}:
 "↓"     "←"        "←"        "←"
 "↓"     "Blocked"  "↑"        "↓"
 "Exit"  "←"        "Blocked"  "↓"
 "↑"     "↑←"       "←"        "←"

In [40]:
g = gridworld(6,6, [], [[6,6]])
val = policyEval(randomPolicy, g)
greedyArrowGrid(val, g)


Blocked: *
Exit states: [6,6]


6×6 Array{Any,2}:
 "↓→"  "→"   "→"   "→"   "↓"   "↓"   
 "↓"   "↓→"  "↓"   "↓"   "↓"   "↓"   
 "↓"   "→"   "↓→"  "↓"   "↓"   "↓"   
 "↓"   "→"   "→"   "↓→"  "↓"   "↓"   
 "→"   "→"   "→"   "→"   "↓→"  "↓"   
 "→"   "→"   "→"   "→"   "→"   "Exit"

In [41]:
g = gridworld(9,11, [[2,2], [3,3], [6,3]], [[3, 1], [6,2], [1,11]])
val = policyEval(randomPolicy, g)
greedyArrowGrid(val, g)

Blocked: *
Exit states: [3,1], [6,2], [1,11]


9×11 Array{Any,2}:
 "↓"     "←"        "←"        "←"  "←"  "→"  "→"  "→"  "→"  "→"  "Exit"
 "↓"     "Blocked"  "↑"        "←"  "←"  "→"  "→"  "→"  "→"  "↑"  "↑"   
 "Exit"  "←"        "Blocked"  "↓"  "←"  "←"  "→"  "→"  "↑"  "↑"  "↑"   
 "↑"     "↑"        "←"        "←"  "←"  "←"  "↑"  "↑"  "↑"  "↑"  "↑"   
 "↑"     "↓"        "←"        "←"  "←"  "←"  "←"  "↑"  "↑"  "↑"  "↑"   
 "→"     "Exit"     "Blocked"  "↑"  "←"  "←"  "←"  "↑"  "↑"  "↑"  "↑"   
 "↑"     "↑"        "←"        "←"  "←"  "←"  "←"  "↑"  "↑"  "↑"  "↑"   
 "↑"     "↑"        "←"        "←"  "←"  "←"  "←"  "←"  "↑"  "↑"  "↑"   
 "↑"     "↑"        "←"        "←"  "←"  "←"  "←"  "←"  "←"  "↑"  "↑"   

In [42]:
g = gridworld(4,4, [[2,2], [3,3]], [[4,4]])
val = policyEval(randomPolicy, g)
greedyArrowGrid(val, g)

Blocked: *
Exit states: [4,4]


4×4 Array{Any,2}:
 "↓→"  "→"        "↓→"       "↓"   
 "↓"   "Blocked"  "→"        "↓"   
 "↓→"  "↓"        "Blocked"  "↓"   
 "→"   "→"        "→"        "Exit"

In [43]:

function randomPolicy2(pos::Array{Int, 1}, g::gridworld)
  actions = states(g)
  policy = zeros(size(actions, 1))
  i,j = pos
  if (pos in g.exitStates) # absorbent case
    ind = find([x == pos for x in actions])
    policy[ind] = 1
  else # non absorvent
    nbs = [[i-1,j], [i+1,j], [i,j-1], [i,j+1],
            [i-1,j-1], [i+1,j-1], [i+1,j+1], [i-1,j+1]]
    nbs = filter(x -> x in actions, nbs)
    n_nbs = size(nbs, 1)
    for x in nbs
      ind = find([state == x for state in actions])
      policy[ind] = 1/n_nbs
    end
  end
  return policy, actions
end



function greedyArrowGrid(val::Array{Float64, 1}, g::gridworld)
  valGrid = valueGrid(val, g)
  arrowGrid = Array(Any, g.width, g.height)
  gridStates = states(g)
  for s in gridStates
    i, j = s
    if s in g.exitStates
      arrowGrid[i,j] = ""
    else
      arrow = ""
      nbs = [[i-1,j], [i+1,j], [i,j-1], [i,j+1],
              [i-1,j-1], [i+1,j-1], [i+1,j+1], [i-1,j+1]]
      nbsVals = map(x -> (x in gridStates) ? round(valGrid[x...], 2) : -Inf, nbs)
      maxVal = maximum(nbsVals)
      maxInd = find([x == maxVal for x in nbsVals])
      for ind in maxInd
        if ind == 1
          arrow *= "↑"
        elseif ind == 2
          arrow *= "↓"
        elseif ind == 3
          arrow *= "←"
        elseif ind == 4
          arrow *= "→"
        elseif ind == 5
          arrow *= "↖"
        elseif ind == 6
          arrow *= "↙"
        elseif ind == 7
          arrow *= "↘"
        elseif ind == 8
          arrow *= "↗"
        end
      end
      arrowGrid[i,j] = arrow
    end
  end
  for x in g.blockedStates
    arrowGrid[x...] = "Blocked"
  end
  for x in g.exitStates
    arrowGrid[x...] = "Exit"
  end
  return arrowGrid
end
;



In [44]:
g = gridworld(4,4, [[2,2], [3,3]], [[3, 1]])
val = policyEval(randomPolicy2, g, niter = 1000)
valueGrid(val, g)

Blocked: *
Exit states: [3,1]


4×4 Array{Any,2}:
 -19.2104   -21.6806  -25.1481  -25.7649
 -14.7402      "*"    -23.6237  -25.5231
   0.0      -14.0698     "*"    -23.5556
  -9.56495  -11.625   -18.8652  -22.2104

In [45]:
greedyArrowGrid(val, g)

Blocked: *
Exit states: [3,1]


4×4 Array{Any,2}:
 "↓"     "↙"        "←"        "↙"
 "↓"     "Blocked"  "↙"        "↓"
 "Exit"  "←"        "Blocked"  "↙"
 "↑"     "↖"        "←"        "←"

In [46]:
g = gridworld(6,6, [], [[6, 6]])
val = policyEval(randomPolicy2, g, niter = 1000)
greedyArrowGrid(val, g)

Blocked: *
Exit states: [6,6]


6×6 Array{Any,2}:
 "↘"  "↘"  "↘"  "↘"  "↘"  "↓"   
 "↘"  "↘"  "↘"  "↘"  "↘"  "↓"   
 "↘"  "↘"  "↘"  "↘"  "↘"  "↓"   
 "↘"  "↘"  "↘"  "↘"  "↘"  "↓"   
 "↘"  "↘"  "↘"  "↘"  "↘"  "↓"   
 "→"  "→"  "→"  "→"  "→"  "Exit"

In [48]:
g = gridworld(9,11, [[2,2], [3,3], [6,3]], [[3, 1], [6,2], [1,11]])
val = policyEval(randomPolicy, g)
greedyArrowGrid(val, g)

Blocked: *
Exit states: [3,1], [6,2], [1,11]


9×11 Array{Any,2}:
 "↓"     "↙"        "←"        "←"  "←"  "→"  "→"  "→"  "→"  "→"  "Exit"
 "↓"     "Blocked"  "↙"        "↖"  "↖"  "↗"  "↗"  "↗"  "↗"  "↗"  "↑"   
 "Exit"  "←"        "Blocked"  "↙"  "↙"  "↙"  "↗"  "↗"  "↗"  "↗"  "↑"   
 "↑"     "↖"        "↖"        "↙"  "←"  "←"  "↗"  "↗"  "↗"  "↗"  "↑"   
 "↘"     "↓"        "↙"        "←"  "↖"  "↖"  "↖"  "↗"  "↗"  "↗"  "↑"   
 "→"     "Exit"     "Blocked"  "↖"  "↖"  "↖"  "↖"  "↖"  "↗"  "↗"  "↑"   
 "↗"     "↑"        "↖"        "←"  "←"  "↖"  "↖"  "↖"  "↖"  "↗"  "↑"   
 "↑"     "↖"        "↖"        "↖"  "↖"  "↖"  "↖"  "↖"  "↖"  "↖"  "↑"   
 "↑"     "↖"        "↖"        "↖"  "↖"  "↖"  "↖"  "↖"  "↖"  "↖"  "↖"   