In [34]:
workspace()
include("PixelArts/PixelArts.jl")
include("RL/RL.jl")
using RL
using PixelArts

In [29]:
type grid_adventure <: AbstractRLEnv
    states
    actions
    step
    task_end
    exits
    blocks
    coins
    bombs
    array
end

function expand_grid(set::Array, k::Int) 
    if k <= 0 error("k must be greater than zero") end
    return (k == 1) ? set : [[s; x] for s in set for x in expand_grid(set, k - 1)]
end

function rand_grid_adventure(height::Int, width::Int; nexits = 1, nblocks = 0, ncoins = 0, nbombs = 0)
    nexits < 1 && error("there must be at least one exit") 
    h, w = height, width
    array = [[i,j] for i in 1:h, j in 1:w]
    grid = [x for x in array]
    tot = nexits + nblocks + ncoins + nbombs
    idx = randperm(h * w)[1:tot]
    exits = grid[idx[1:nexits]]
    blocks = nblocks > 0 ? grid[idx[(nexits + 1):(nexits + nblocks)]] : []
    coins = ncoins > 0 ? grid[idx[(nexits + nblocks + 1):(nexits + nblocks + ncoins)]] : []
    bombs = nbombs > 0 ? grid[idx[((nexits + nblocks + ncoins + 1):tot)]] : []
    active_coins_states = (ncoins <= 0) ? [] : expand_grid([false, true], ncoins)
    active_bombs_states = (nbombs <= 0) ? [] : expand_grid([false , true], nbombs)
    states = [grid, active_coins_states, active_bombs_states]
    actions(s) = ["up", "left", "right", "down"]
    task_end(s) = (s[1] in exits)
    function step(state, action)
        i, j = state[1] 
        active_coins = coins[find(state[2])]
        active_bombs = bombs[find(state[3])]
        if (action == "up" && i > 1 && [i - 1, j] ∉ blocks) i = i - 1 end
        if (action == "down" && i < h && [i + 1, j] ∉ blocks) i = i + 1 end
        if (action == "left" && j > 1 && [i, j - 1] ∉ blocks) j = j - 1 end
        if (action == "right" && j < w && [i, j + 1] ∉ blocks) j = j + 1 end
        new_state = deepcopy(state)
        new_state[1] = [i, j]
        reward = -1 # default
        if ([i, j] ∈ active_coins) 
            reward = 50 
            coinidx = find([x == [i,j] for x in coins])
            new_state[2][coinidx] = 0
        end
        if ([i, j] ∈ active_bombs) 
            reward = -100 
            bombidx = find([x == [i,j] for x in bombs])
            new_state[3][bombidx] = 0
        end
        if ([i, j] ∈ exits) 
            reward = 0 
        end
        return new_state, reward
    end
    return grid_adventure(states, actions, step, task_end, exits, blocks, coins, bombs, array)
end

    
function render(g::grid_adventure, start_pos, height = 400, width = 400)
    h, w = size(g.array)
    canvas_id = "grid_adventure" * randstring(3)
    create_canvas(canvas_id, h, w, height, width)
    attr = Dict("width" => 1, "height" => 1)
    style = Dict("fill" => "white", "stroke-width" => 0.1, "stroke" => "black")
    p = [[i, j] for i in 1:h for j in 1:w]
    s = add_pixels(canvas_id, p, attr = attr, style = style, disp = false)
    for pos in g.exits
        s*= add_pixel_image("""exit$(join(pos, "_"))""", canvas_id, pos..., "RL/files/Images/exit.png", disp = false) 
    end
    for pos in g.bombs 
        s*= add_pixel_image("""bomb$(join(pos, "_"))""", canvas_id, pos..., "RL/files/Images/bomb.png", disp = false) 
    end
    for pos in g.coins 
        s*= add_pixel_image("""coin$(join(pos, "_"))""", canvas_id, pos..., "RL/files/Images/coin.png", disp = false) 
    end
    for pos in g.blocks 
        s*= add_pixel_image("""block$(join(pos, "_"))""", canvas_id, pos..., "RL/files/Images/wall.png", disp = false) 
    end
    jsdisp(s)
    racer_id = canvas_id * "racer"
    add_pixel_cross(racer_id, canvas_id, start_pos..., "blue")
    return canvas_id
end

function animate(g::grid_adventure, canvas_id::String, states; speed = 0.1, max_time = 60)
    h, w = size(g.array)
    pos = [x[1] for x in states]
    coins = [x[2] for x in states]
    bombs = [x[3] for x in states]
    n = size(pos, 1)
    elapsed = 0.
    racer_id = canvas_id * "racer"
    i = 1
    while i <= n && elapsed < max_time 
        tic()
        translate_element(racer_id, pos[i]...)
        for (k, c) in enumerate(coins[i])
            c == 0 && remove_element_by_id("""coin$(join(g.coins[k], "_"))""")
        end
        for (k, b) in enumerate(bombs[i])
            b == 0 && remove_element_by_id("""bomb$(join(g.bombs[k], "_"))""")
        end
        sleep(speed)
        elapsed += toq()
        i += 1
    end
end

function init_state(g::grid_adventure, pos = [1, 1])
    return [pos, [true for x in g.coins], [true for x in g.bombs]] 
end

init_state (generic function with 2 methods)

In [30]:
g = rand_grid_adventure(15, 15; nexits = 2, nblocks = 10, ncoins = 10, nbombs = 10)
println("blocks: $(g.blocks)")
println("coins: $(g.coins)")
println("bombs: $(g.bombs)")
println("exits: $(g.exits)")

blocks: Array{Int64,1}[[15, 3], [8, 12], [9, 4], [2, 9], [9, 1], [3, 4], [4, 15], [15, 2], [4, 9], [11, 13]]
coins: Array{Int64,1}[[15, 1], [2, 8], [9, 8], [3, 3], [8, 2], [6, 14], [10, 3], [2, 7], [10, 2], [3, 13]]
bombs: Array{Int64,1}[[1, 6], [2, 12], [5, 12], [7, 2], [7, 12], [1, 14], [2, 14], [7, 7], [5, 8], [11, 8]]
exits: Array{Int64,1}[[7, 15], [6, 7]]


In [31]:
gcanvas = render(g, [1,1])

"grid_adventureS0P"

In [32]:
policy(s) = rand(g.actions(s))
state = init_state(g, [1,1])
agent = RLAgent(policy, state)
states, rewards = [state], []
for i in 1:1000
    state, reward = g.step(state, agent.policy(state))
    states, rewards = [states; [state]], [rewards; [reward]]
end
animate(g, gcanvas, states)

In [28]:
states

1001-element Array{Array{Array{Int64,1},1},1}:
 Array{Int64,1}[[1, 1], [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]]
 Array{Int64,1}[[2, 1], [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]]
 Array{Int64,1}[[3, 1], [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]]
 Array{Int64,1}[[3, 1], [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]]
 Array{Int64,1}[[2, 1], [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]]
 Array{Int64,1}[[2, 2], [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]]
 Array{Int64,1}[[2, 3], [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]]
 Array{Int64,1}[[2, 2], [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]]
 Array{In